﻿// Provides a C++11 implementation of a multi-producer, multi-consumer lock-free queue.
// An overview, including benchmark results, is provided here:
//     http://moodycamel.com/blog/2014/a-fast-general-purpose-lock-free-queue-for-c++
// The full design is also described in excruciating detail at:
//    http://moodycamel.com/blog/2014/detailed-design-of-a-lock-free-queue

// Simplified BSD license:
// Copyright (c) 2013-2020, Cameron Desrochers.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// - Redistributions of source code must retain the above copyright notice, this list of
// conditions and the following disclaimer.
// - Redistributions in binary form must reproduce the above copyright notice, this list of
// conditions and the following disclaimer in the documentation and/or other materials
// provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
// OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
// TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
// EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

// Also dual-licensed under the Boost Software License (see LICENSE.md)

#pragma once
#if defined(__GNUC__)&&!defined(__INTEL_COMPILER)
																														// Disable -Wconversion warnings (spuriously triggered when Traits::u_index and
// Traits::index_t are set to < 32 bits, causing integer promotion, causing warnings
// upon assigning any computed values)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wconversion"

#ifdef MCDBGQ_USE_RELACY
#pragma GCC diagnostic ignored "-Wint-to-pointer-cast"
#endif
#endif
#if defined(_MSC_VER)&&(!defined(_HAS_CXX17)||!_HAS_CXX17)
																														// VS2019 with /W4 warns about constant conditional expressions but unless /std=c++17 or higher
// does not support `if constexpr`, so we have no choice but to simply disable the warning
#pragma warning(push)
#pragma warning(disable: 4127)  // conditional expression is constant
#endif
#if defined(__APPLE__)
#include "TargetConditionals.h"
#endif
#ifdef MCDBGQ_USE_RELACY
																														#include "relacy/relacy_std.hpp"
#include "relacy_shims.h"
// We only use malloc/free anyway, and the delete macro messes up `= delete` method declarations.
// We'll override the default trait malloc ourselves without a macro.
#undef new
#undef delete
#undef malloc
#undef free
#else
#include <atomic>        // Requires C++11. Sorry VS2010.
#include <cassert>

#endif
#include <cstddef>              // for max_align_t
#include <cstdint>
#include <cstdlib>
#include <type_traits>
#include <algorithm>
#include <utility>
#include <limits>
#include <climits>        // for CHAR_BIT
#include <array>
#include <thread>        // partly for __WINPTHREADS_VERSION if on MinGW-w64 w/ POSIX threading
#include <mutex>        // used for thread exit synchronization

// Platform-specific definitions of a numeric thread ID type and an invalid value
namespace moodycamel{
	namespace details{
		template<typename thread_id_t>
		struct thread_id_converter{
			typedef thread_id_t thread_id_numeric_u_index;
			typedef thread_id_t thread_id_hash_t;
			static thread_id_hash_t prehash(thread_id_t const &x){return x;}
		};
	}
}
#if defined(MCDBGQ_USE_RELACY)
																														namespace moodycamel { namespace details {
	typedef std::uint32_t thread_id_t;
	static const thread_id_t invalid_thread_id  = 0xFFFFFFFFU;
	static const thread_id_t invalid_thread_id2 = 0xFFFFFFFEU;
	static inline thread_id_t thread_id() { return rl::thread_index(); }
} }
#elif defined(_WIN32)||defined(__WINDOWS__)||defined(__WIN32__)
																														// No sense pulling in windows.h in a header, we'll manually declare the function
// we use and rely on backwards-compatibility for this not to break
extern "C" __declspec(dllimport) unsigned long __stdcall GetCurrentThreadId(void);
namespace moodycamel { namespace details {
	static_assert(sizeof(unsigned long) == sizeof(std::uint32_t), "Expected size of unsigned long to be 32 bits on Windows");
	typedef std::uint32_t thread_id_t;
	static const thread_id_t invalid_thread_id  = 0;			// See http://blogs.msdn.com/b/oldnewthing/archive/2004/02/23/78395.aspx
	static const thread_id_t invalid_thread_id2 = 0xFFFFFFFFU;	// Not technically guaranteed to be invalid, but is never used in practice. Note that all Win32 thread IDs are presently multiples of 4.
	static inline thread_id_t thread_id() { return static_cast<thread_id_t>(::GetCurrentThreadId()); }
} }
#elif defined(__arm__)||defined(_M_ARM)||defined(__aarch64__)||(defined(__APPLE__)&&TARGET_OS_IPHONE)||defined(__MVS__)||defined(MOODYCAMEL_NO_THREAD_LOCAL)
																														namespace moodycamel { namespace details {
	static_assert(sizeof(std::thread::id) == 4 || sizeof(std::thread::id) == 8, "std::thread::id is expected to be either 4 or 8 bytes");

	typedef std::thread::id thread_id_t;
	static const thread_id_t invalid_thread_id;         // Default ctor creates invalid ID

	// Note we don't define a invalid_thread_id2 since std::thread::id doesn't have one; it's
	// only used if MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED is defined anyway, which it won't
	// be.
	static inline thread_id_t thread_id() { return std::this_thread::get_id(); }

	template<std::u_index> struct thread_id_size { };
	template<> struct thread_id_size<4> { typedef std::uint32_t numeric_t; };
	template<> struct thread_id_size<8> { typedef std::uint64_t numeric_t; };

	template<> struct thread_id_converter<thread_id_t> {
		typedef thread_id_size<sizeof(thread_id_t)>::numeric_t thread_id_numeric_u_index;
#ifndef __APPLE__
		typedef std::u_index thread_id_hash_t;
#else
		typedef thread_id_numeric_u_index thread_id_hash_t;
#endif

		static thread_id_hash_t prehash(thread_id_t const& x)
		{
#ifndef __APPLE__
			return std::hash<std::thread::id>()(x);
#else
			return *reinterpret_cast<thread_id_hash_t const*>(&x);
#endif
		}
	};
} }
#else
// Use a nice trick from this answer: http://stackoverflow.com/a/8438730/21475
// In order to get a numeric thread ID in a platform-independent way, we use a thread-local
// static variable's address as a thread identifier :-)
#if defined(__GNUC__)||defined(__INTEL_COMPILER)
#define MOODYCAMEL_THREADLOCAL __thread
#elif defined(_MSC_VER)
#define MOODYCAMEL_THREADLOCAL __declspec(thread)
#else
// Assume C++11 compliant compiler
#define MOODYCAMEL_THREADLOCAL thread_local
#endif
namespace moodycamel{
	namespace details{
		typedef std::uintptr_t thread_id_t;
		static const thread_id_t invalid_thread_id=0;        // Address can't be nullptr
		static const thread_id_t invalid_thread_id2=1;        // Member accesses off a nullptr pointer are also generally invalid. Plus it's not aligned.
		inline thread_id_t thread_id(){
			static MOODYCAMEL_THREADLOCAL int x;
			return reinterpret_cast<thread_id_t>(&x);
		}
	}
}
#endif

// Constexpr if
#ifndef MOODYCAMEL_CONSTEXPR_IF
#if (defined(_MSC_VER)&&defined(_HAS_CXX17)&&_HAS_CXX17)||__cplusplus>201402L
																														#define MOODYCAMEL_CONSTEXPR_IF if constexpr
#define MOODYCAMEL_MAYBE_UNUSED [[maybe_unused]]
#else
#define MOODYCAMEL_CONSTEXPR_IF if
#define MOODYCAMEL_MAYBE_UNUSED
#endif
#endif

// Exceptions
#ifndef MOODYCAMEL_EXCEPTIONS_ENABLED
#if (defined(_MSC_VER)&&defined(_CPPUNWIND))||(defined(__GNUC__)&&defined(__EXCEPTIONS))||(!defined(_MSC_VER)&&!defined(__GNUC__))
#define MOODYCAMEL_EXCEPTIONS_ENABLED
#endif
#endif
#ifdef MOODYCAMEL_EXCEPTIONS_ENABLED
#define MOODYCAMEL_TRY try
#define MOODYCAMEL_CATCH(...) catch(__VA_ARGS__)
#define MOODYCAMEL_RETHROW throw
#define MOODYCAMEL_THROW(expr) throw (expr)
#else
																														#define MOODYCAMEL_TRY MOODYCAMEL_CONSTEXPR_IF (true)
#define MOODYCAMEL_CATCH(...) else MOODYCAMEL_CONSTEXPR_IF (false)
#define MOODYCAMEL_RETHROW
#define MOODYCAMEL_THROW(expr)
#endif
#ifndef MOODYCAMEL_NOEXCEPT
#if !defined(MOODYCAMEL_EXCEPTIONS_ENABLED)
																														#define MOODYCAMEL_NOEXCEPT
#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) true
#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) true
#elif defined(_MSC_VER)&&defined(_NOEXCEPT)&&_MSC_VER<1800
																														// VS2012's std::is_nothrow_[move_]constructible is broken and returns true when it shouldn't :-(
// We have to assume *all* non-trivial constructors may throw on VS2012!
#define MOODYCAMEL_NOEXCEPT _NOEXCEPT
#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) (std::is_rvalue_reference<valueType>::value && std::is_move_constructible<type>::value ? std::is_trivially_move_constructible<type>::value : std::is_trivially_copy_constructible<type>::value)
#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) ((std::is_rvalue_reference<valueType>::value && std::is_move_assignable<type>::value ? std::is_trivially_move_assignable<type>::value || std::is_nothrow_move_assignable<type>::value : std::is_trivially_copy_assignable<type>::value || std::is_nothrow_copy_assignable<type>::value) && MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr))
#elif defined(_MSC_VER)&&defined(_NOEXCEPT)&&_MSC_VER<1900
																														#define MOODYCAMEL_NOEXCEPT _NOEXCEPT
#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) (std::is_rvalue_reference<valueType>::value && std::is_move_constructible<type>::value ? std::is_trivially_move_constructible<type>::value || std::is_nothrow_move_constructible<type>::value : std::is_trivially_copy_constructible<type>::value || std::is_nothrow_copy_constructible<type>::value)
#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) ((std::is_rvalue_reference<valueType>::value && std::is_move_assignable<type>::value ? std::is_trivially_move_assignable<type>::value || std::is_nothrow_move_assignable<type>::value : std::is_trivially_copy_assignable<type>::value || std::is_nothrow_copy_assignable<type>::value) && MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr))
#else
#define MOODYCAMEL_NOEXCEPT noexcept
#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) noexcept(expr)
#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) noexcept(expr)
#endif
#endif
#ifndef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
#ifdef MCDBGQ_USE_RELACY
#define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
#else
// VS2013 doesn't support `thread_local`, and MinGW-w64 w/ POSIX threading has a crippling bug: http://sourceforge.net/p/mingw-w64/bugs/445
// g++ <=4.7 doesn't support thread_local either.
// Finally, iOS/ARM doesn't have support for it either, and g++/ARM allows it to compile but it's unconfirmed to actually work
#if (!defined(_MSC_VER)||_MSC_VER>=1900)&&(!defined(__MINGW32__)&&!defined(__MINGW64__)||!defined(__WINPTHREADS_VERSION))&&(!defined(__GNUC__)||__GNUC__>4||(__GNUC__==4&&__GNUC_MINOR__>=8))&&(!defined(__APPLE__)||!TARGET_OS_IPHONE)&&!defined(__arm__)&&!defined(_M_ARM)&&!defined(__aarch64__)&&!defined(__MVS__)
// Assume `thread_local` is fully supported in all other C++11 compilers/platforms
#define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED    // tentatively enabled for now; years ago several users report having problems with it on
#endif
#endif
#endif

// VS2012 doesn't support deleted functions.
// In this case, we declare the function normally but don't define it. A link error will be generated if the function is called.
#ifndef MOODYCAMEL_DELETE_FUNCTION
#if defined(_MSC_VER)&&_MSC_VER<1800
#define MOODYCAMEL_DELETE_FUNCTION
#else
#define MOODYCAMEL_DELETE_FUNCTION = delete
#endif
#endif
namespace moodycamel{
	namespace details{
#ifndef MOODYCAMEL_ALIGNAS
// VS2013 doesn't support alignas or alignof, and align() requires a constant literal
#if defined(_MSC_VER)&&_MSC_VER<=1800
																																#define MOODYCAMEL_ALIGNAS(alignment) __declspec(align(alignment))
#define MOODYCAMEL_ALIGNOF(obj) __alignof(obj)
#define MOODYCAMEL_ALIGNED_TYPE_LIKE(T, obj) typename details::Vs2013Aligned<std::alignment_of<obj>::value, T>::type
	template<int Align, typename T> struct Vs2013Aligned { };  // default, unsupported alignment
	template<typename T> struct Vs2013Aligned<1, T> { typedef __declspec(align(1)) T type; };
	template<typename T> struct Vs2013Aligned<2, T> { typedef __declspec(align(2)) T type; };
	template<typename T> struct Vs2013Aligned<4, T> { typedef __declspec(align(4)) T type; };
	template<typename T> struct Vs2013Aligned<8, T> { typedef __declspec(align(8)) T type; };
	template<typename T> struct Vs2013Aligned<16, T> { typedef __declspec(align(16)) T type; };
	template<typename T> struct Vs2013Aligned<32, T> { typedef __declspec(align(32)) T type; };
	template<typename T> struct Vs2013Aligned<64, T> { typedef __declspec(align(64)) T type; };
	template<typename T> struct Vs2013Aligned<128, T> { typedef __declspec(align(128)) T type; };
	template<typename T> struct Vs2013Aligned<256, T> { typedef __declspec(align(256)) T type; };
#else
		template<typename T>
		struct identity{typedef T type;};
#define MOODYCAMEL_ALIGNAS(alignment) alignas(alignment)
#define MOODYCAMEL_ALIGNOF(obj) alignof(obj)
#define MOODYCAMEL_ALIGNED_TYPE_LIKE(T, obj) alignas(alignof(obj)) typename details::identity<T>::type
#endif
#endif
	}
}


// TSAN can false report races in lock-free code.  To enable TSAN to be used from projects that use this one,
// we can apply per-function compile-time suppression.
// See https://clang.llvm.org/docs/ThreadSanitizer.html#has-feature-thread-sanitizer
#define MOODYCAMEL_NO_TSAN
#if defined(__has_feature)
																														#if __has_feature(thread_sanitizer)
  #undef MOODYCAMEL_NO_TSAN
  #define MOODYCAMEL_NO_TSAN __attribute__((no_sanitize("thread")))
 #endif // TSAN
#endif // TSAN

// Compiler-specific likely/unlikely hints
namespace moodycamel{
	namespace details{
#if defined(__GNUC__)
																																static inline bool (likely)(bool x) { return __builtin_expect((x), true); }
	static inline bool (unlikely)(bool x) { return __builtin_expect((x), false); }
#else
		static inline bool (likely)(bool x){return x;}
		static inline bool (unlikely)(bool x){return x;}
#endif
	}
}
#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
#include "internal/concurrentqueue_internal_debug.h"
#endif
namespace moodycamel{
	namespace details{
		template<typename T>
		struct const_numeric_max{
			static_assert(std::is_integral<T>::value, "const_numeric_max can only be used with integers");
			static const T value=std::numeric_limits<T>::is_signed?(static_cast<T>(1)<<(sizeof(T)*CHAR_BIT-1))-static_cast<T>(1):static_cast<T>(-1);
		};
#if defined(__GLIBCXX__)
		typedef ::max_align_t std_max_align_t;      // libstdc++ forgot to add it to std:: for a while
#else
		typedef Math::max_align_t std_max_align_t;   // Others (e.g. MSVC) insist it can *only* be accessed via std::
#endif
		// Some platforms have incorrectly set max_align_t to a type with <8 bytes alignment even while supporting
		// 8-byte aligned scalar values (*cough* 32-bit iOS). Work around this with our own union. See issue #64.
		typedef union{
			std_max_align_t x;
			long long y;
			void *z;
		}max_align_t;
	}
// Default traits for the ConcurrentQueue. To change some of the
// traits without re-implementing all of them, inherit from this
// struct and shadow the declarations you wish to be different;
// since the traits are used as a template type parameter, the
// shadowed declarations will be used where defined, and the defaults
// otherwise.
	struct ConcurrentQueueDefaultTraits{
		// General-purpose size type. std::u_index is strongly recommended.
		typedef std::u_index u_index;
		// The type used for the enqueue and dequeue indices. Must be at least as
		// large as u_index. Should be significantly larger than the number of elements
		// you expect to hold at once, especially if you have a high turnover rate;
		// for example, on 32-bit x86, if you expect to have over a hundred million
		// elements or pump several million elements through your queue in a very
		// short space of time, using a 32-bit type *may* trigger a race condition.
		// A 64-bit int type is recommended in that case, and in practice will
		// prevent a race condition no matter the usage of the queue. Note that
		// whether the queue is lock-free with a 64-int type depends on the whether
		// std::atomic<std::uint64_t> is lock-free, which is platform-specific.
		typedef std::u_index index_t;
		// Internally, all elements are enqueued and dequeued from multi-element
		// blocks; this is the smallest controllable unit. If you expect few elements
		// but many producers, a smaller block size should be favoured. For few producers
		// and/or many elements, a larger block size is preferred. A sane default
		// is provided. Must be a power of 2.
		static const u_index BLOCK_SIZE=32;
		// For explicit producers (i.e. when using a producer token), the block is
		// checked for being empty by iterating through a list of flags, one per element.
		// For large block sizes, this is too inefficient, and switching to an atomic
		// counter-based approach is faster. The switch is made for block sizes strictly
		// larger than this threshold.
		static const u_index EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD=32;
		// How many full blocks can be expected for a single explicit producer? This should
		// reflect that number's maximum for optimal performance. Must be a power of 2.
		static const u_index EXPLICIT_INITIAL_INDEX_SIZE=32;
		// How many full blocks can be expected for a single implicit producer? This should
		// reflect that number's maximum for optimal performance. Must be a power of 2.
		static const u_index IMPLICIT_INITIAL_INDEX_SIZE=32;
		// The initial size of the hash table mapping thread IDs to implicit producers.
		// Note that the hash is resized every time it becomes half full.
		// Must be a power of two, and either 0 or at least 1. If 0, implicit production
		// (using the enqueue methods without an explicit producer token) is disabled.
		static const u_index INITIAL_IMPLICIT_PRODUCER_HASH_SIZE=32;
		// Controls the number of items that an explicit consumer (i.e. one with a token)
		// must consume before it causes all consumers to rotate and move on to the next
		// internal queue.
		static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE=256;
		// The maximum number of elements (inclusive) that can be enqueued to a sub-queue.
		// Enqueue operations that would cause this limit to be surpassed will fail. Note
		// that this limit is enforced at the block level (for performance reasons), i.e.
		// it's rounded up to the nearest block size.
		static const u_index MAX_SUBQUEUE_SIZE=details::const_numeric_max<u_index>::value;
		// The number of times to spin before sleeping when waiting on a semaphore.
		// Recommended values are on the order of 1000-10000 unless the number of
		// consumer threads exceeds the number of idle cores (in which case try 0-100).
		// Only affects instances of the BlockingConcurrentQueue.
		static const int MAX_SEMA_SPINS=10000;
		// Whether to recycle dynamically-allocated blocks into an internal free list or
		// not. If false, only pre-allocated blocks (controlled by the constructor
		// arguments) will be recycled, and all others will be `free`d back to the heap.
		// Note that blocks consumed by explicit producers are only freed on destruction
		// of the queue (not following destruction of the token) regardless of this trait.
		static const bool RECYCLE_ALLOCATED_BLOCKS=false;
#ifndef MCDBGQ_USE_RELACY
		// Memory allocation can be customized if needed.
		// malloc should return nullptr on failure, and handle alignment like std::malloc.
#if defined(malloc)||defined(free)
																																// Gah, this is 2015, stop defining macros that break standard code already!
	// Work around malloc/free being special macros:
	static inline void* WORKAROUND_malloc(u_index size) { return malloc(size); }
	static inline void WORKAROUND_free(void* paramRegPtr) { return free(paramRegPtr); }
	static inline void* (malloc)(u_index size) { return WORKAROUND_malloc(size); }
	static inline void (free)(void* paramRegPtr) { return WORKAROUND_free(paramRegPtr); }
#else
		static inline void *malloc(u_index size){return std::malloc(size);}
		static inline void free(void *paramRegPtr){return std::free(paramRegPtr);}
#endif
#else
																																// Debug versions when running under the Relacy race detector (ignore
	// these in user code)
	static inline void* malloc(u_index size) { return rl::rl_malloc(size, $); }
	static inline void free(void* paramRegPtr) { return rl::rl_free(paramRegPtr, $); }
#endif
	};
// When producing or consuming many elements, the most efficient way is to:
//    1) Use one of the bulk-operation methods of the queue with a token
//    2) Failing that, use the bulk-operation methods without a token
//    3) Failing that, create a token and use that with the single-item methods
//    4) Failing that, use the single-parameter methods of the queue
// Having said that, don't create tokens willy-nilly -- ideally there should be
// a maximum of one token per thread (of each kind).
	struct ProducerToken;
	struct ConsumerToken;
	template<typename T, typename Traits>
	class ConcurrentQueue;
	template<typename T, typename Traits>
	class BlockingConcurrentQueue;
	class ConcurrentQueueTests;
	namespace details{
		struct ConcurrentQueueProducerTypelessBase{
			ConcurrentQueueProducerTypelessBase *next;
			std::atomic<bool> inactive;
			ProducerToken *token;
			ConcurrentQueueProducerTypelessBase(): next(nullptr), inactive(false), token(nullptr){
			}
		};
		template<bool use32>
		struct _hash_32_or_64{
			static inline std::uint32_t hash(std::uint32_t h){
				// MurmurHash3 finalizer -- see https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
				// Since the thread ID is already unique, all we really want to do is propagate that
				// uniqueness evenly across all the bits, so that we can use a subset of the bits while
				// reducing collisions significantly
				h^=h>>16;
				h*=0x85ebca6b;
				h^=h>>13;
				h*=0xc2b2ae35;
				return h^(h>>16);
			}
		};
		template<>
		struct _hash_32_or_64<1>{
			static inline std::uint64_t hash(std::uint64_t h){
				h^=h>>33;
				h*=0xff51afd7ed558ccd;
				h^=h>>33;
				h*=0xc4ceb9fe1a85ec53;
				return h^(h>>33);
			}
		};
		template<std::u_index size>
		struct hash_32_or_64 : public _hash_32_or_64<(size>4)>{};
		static inline u_index hash_thread_id(thread_id_t id){
			static_assert(sizeof(thread_id_t)<=8, "Expected a platform where thread IDs are at most 64-bit values");
			return static_cast<u_index>(hash_32_or_64<sizeof(thread_id_converter<thread_id_t>::thread_id_hash_t)>::hash(thread_id_converter<thread_id_t>::prehash(id)));
		}
		template<typename T>
		static inline bool circular_less_than(T a, T b){
			static_assert(std::is_integral<T>::value&&!std::numeric_limits<T>::is_signed, "circular_less_than is intended to be used only with unsigned integer types");
			return static_cast<T>(a-b)>static_cast<T>(static_cast<T>(1)<<(static_cast<T>(sizeof(T)*CHAR_BIT-1)));
			// Note: extra parens around rhs of operator<< is MSVC bug: https://developercommunity2.visualstudio.com/t/C4554-triggers-when-both-lhs-and-rhs-is/10034931
			//       silencing the bug requires #pragma warning(disable: 4554) around the calling code and has no effect when done here.
		}
		template<typename U>
		static inline char *align_for(char *paramRegPtr){
			const std::u_index alignment=std::alignment_of<U>::value;
			return paramRegPtr+(alignment-(reinterpret_cast<std::uintptr_t>(paramRegPtr)%alignment))%alignment;
		}
		template<typename T>
		static inline T ceil_to_pow_2(T x){
			static_assert(std::is_integral<T>::value&&!std::numeric_limits<T>::is_signed, "ceil_to_pow_2 is intended to be used only with unsigned integer types");

			// Adapted from http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
			--x;
			x|=x>>1;
			x|=x>>2;
			x|=x>>4;
			for(std::u_index i=1;i<sizeof(T);i<<=1){
				x|=x>>(i<<3);
			}
			++x;
			return x;
		}
		template<typename T>
		static inline void swap_relaxed(std::atomic <T> &left, std::atomic <T> &right){
			T temp=std::move(left.load(std::memory_order_relaxed));
			left.store(std::move(right.load(std::memory_order_relaxed)), std::memory_order_relaxed);
			right.store(std::move(temp), std::memory_order_relaxed);
		}
		template<typename T>
		static inline T const &nomove(T const &x){
			return x;
		}
		template<bool Enable>
		struct nomove_if{
			template<typename T>
			static inline T const &eval(T const &x){
				return x;
			}
		};
		template<>
		struct nomove_if<false>{
			template<typename U>
			static inline auto eval(U &&x)->decltype(std::forward<U>(x)){
				return std::forward<U>(x);
			}
		};
		template<typename It>
		static inline auto deref_noexcept(It &it) MOODYCAMEL_NOEXCEPT->decltype(*it){
			return *it;
		}
#if defined(__clang__)||!defined(__GNUC__)||__GNUC__>4||(__GNUC__==4&&__GNUC_MINOR__>=8)
		template<typename T>
		struct is_trivially_destructible : std::is_trivially_destructible<T>{};
#else
		template<typename T> struct is_trivially_destructible : std::has_trivial_destructor<T> { };
#endif
#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
#ifdef MCDBGQ_USE_RELACY
																																typedef RelacyThreadExitListener ThreadExitListener;
	typedef RelacyThreadExitNotifier ThreadExitNotifier;
#else
		class ThreadExitNotifier;
		struct ThreadExitListener{
			typedef void (*callback_t)(void *);
			callback_t callback;
			void *userData;
			ThreadExitListener *next;        // reserved for use by the ThreadExitNotifier
			ThreadExitNotifier *chain;        // reserved for use by the ThreadExitNotifier
		};
		EBCLASS(ThreadExitNotifier){
				public:
				static void subscribe(ThreadExitListener *listener){
					auto &tlsInst=instance();
					std::lock_guard <std::mutex> guard(mutex());
					listener->next=tlsInst.tail;
					listener->chain=&tlsInst;
					tlsInst.tail=listener;
				}
				static void unsubscribe(ThreadExitListener *listener){
					std::lock_guard <std::mutex> guard(mutex());
					if(!listener->chain){
						return;  // race with ~ThreadExitNotifier
					}
					auto &tlsInst=*listener->chain;
					listener->chain=nullptr;
					ThreadExitListener **prev=&tlsInst.tail;
					for(auto paramRegPtr=tlsInst.tail;paramRegPtr!=nullptr;paramRegPtr=paramRegPtr->next){
						if(paramRegPtr==listener){
							*prev=paramRegPtr->next;
							break;
						}
						prev=&paramRegPtr->next;
					}
				}
				private:
				ThreadExitNotifier(): tail(nullptr){}
				ThreadExitNotifier(ThreadExitNotifier const &) MOODYCAMEL_DELETE_FUNCTION;
				ThreadExitNotifier &operator=(ThreadExitNotifier const &) MOODYCAMEL_DELETE_FUNCTION;
				~ThreadExitNotifier(){
					// This thread is about to exit, let everyone know!
					assert(this==&instance()&&
						   "If this assert fails, you likely have a buggy compiler! Change the preprocessor conditions such that MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED is no longer defined.");
					std::lock_guard <std::mutex> guard(mutex());
					for(auto paramRegPtr=tail;paramRegPtr!=nullptr;paramRegPtr=paramRegPtr->next){
						paramRegPtr->chain=nullptr;
						paramRegPtr->callback(paramRegPtr->userData);
					}
				}
				// Thread-local
				static inline ThreadExitNotifier &instance(){
					static thread_local ThreadExitNotifier notifier;
					return notifier;
				}
				static inline std::mutex &mutex(){
					// Must be static because the ThreadExitNotifier could be destroyed while unsubscribe is called
					static std::mutex mutex;
					return mutex;
				}
				private:
				ThreadExitListener *tail;
		};
#endif
#endif
		template<typename T>
		struct static_is_lock_free_num{enum{value=0};};
		template<>
		struct static_is_lock_free_num<signed char>{enum{value=ATOMIC_CHAR_LOCK_FREE};};
		template<>
		struct static_is_lock_free_num<short>{enum{value=ATOMIC_SHORT_LOCK_FREE};};
		template<>
		struct static_is_lock_free_num<int>{enum{value=ATOMIC_INT_LOCK_FREE};};
		template<>
		struct static_is_lock_free_num<long>{enum{value=ATOMIC_LONG_LOCK_FREE};};
		template<>
		struct static_is_lock_free_num<long long>{enum{value=ATOMIC_LLONG_LOCK_FREE};};
		template<typename T>
		struct static_is_lock_free : static_is_lock_free_num<typename std::make_signed<T>::type>{};
		template<>
		struct static_is_lock_free<bool>{enum{value=ATOMIC_BOOL_LOCK_FREE};};
		template<typename U>
		struct static_is_lock_free<U *>{enum{value=ATOMIC_POINTER_LOCK_FREE};};
	}
	struct ProducerToken{
		template<typename T, typename Traits>
		explicit ProducerToken(ConcurrentQueue<T, Traits> &queue);
		template<typename T, typename Traits>
		explicit ProducerToken(BlockingConcurrentQueue<T, Traits> &queue);
		ProducerToken(ProducerToken &&other) MOODYCAMEL_NOEXCEPT: producer(other.producer){
			other.producer=nullptr;
			if(producer!=nullptr){
				producer->token=this;
			}
		}
		inline ProducerToken &operator=(ProducerToken &&other) MOODYCAMEL_NOEXCEPT{
			swap(other);
			return *this;
		}
		void swap(ProducerToken &other) MOODYCAMEL_NOEXCEPT{
			std::swap(producer, other.producer);
			if(producer!=nullptr){
				producer->token=this;
			}
			if(other.producer!=nullptr){
				other.producer->token=&other;
			}
		}
		// A token is always valid unless:
		//     1) Memory allocation failed during construction
		//     2) It was moved via the move constructor
		//        (Note: assignment does a swap, leaving both potentially valid)
		//     3) The associated queue was destroyed
		// Note that if valid() returns true, that only indicates
		// that the token is valid for use with a specific queue,
		// but not which one; that's up to the user to track.
		inline bool valid() const{return producer!=nullptr;}
		~ProducerToken(){
			if(producer!=nullptr){
				producer->token=nullptr;
				producer->inactive.store(true, std::memory_order_release);
			}
		}
		// Disable copying and assignment
		ProducerToken(ProducerToken const &) MOODYCAMEL_DELETE_FUNCTION;
		ProducerToken &operator=(ProducerToken const &) MOODYCAMEL_DELETE_FUNCTION;
	private:
		template<typename T, typename Traits> friend
		class ConcurrentQueue;
		friend class ConcurrentQueueTests;
	protected:
		details::ConcurrentQueueProducerTypelessBase *producer;
	};
	struct ConsumerToken{
		template<typename T, typename Traits>
		explicit ConsumerToken(ConcurrentQueue<T, Traits> &q);
		template<typename T, typename Traits>
		explicit ConsumerToken(BlockingConcurrentQueue<T, Traits> &q);
		ConsumerToken(ConsumerToken &&other) MOODYCAMEL_NOEXCEPT: initialOffset(other.initialOffset),
																  lastKnownGlobalOffset(other.lastKnownGlobalOffset),
																  itemsConsumedFromCurrent(other.itemsConsumedFromCurrent),
																  currentProducer(other.currentProducer),
																  desiredProducer(other.desiredProducer){
		}
		inline ConsumerToken &operator=(ConsumerToken &&other) MOODYCAMEL_NOEXCEPT{
			swap(other);
			return *this;
		}
		void swap(ConsumerToken &other) MOODYCAMEL_NOEXCEPT{
			std::swap(initialOffset, other.initialOffset);
			std::swap(lastKnownGlobalOffset, other.lastKnownGlobalOffset);
			std::swap(itemsConsumedFromCurrent, other.itemsConsumedFromCurrent);
			std::swap(currentProducer, other.currentProducer);
			std::swap(desiredProducer, other.desiredProducer);
		}
		// Disable copying and assignment
		ConsumerToken(ConsumerToken const &) MOODYCAMEL_DELETE_FUNCTION;
		ConsumerToken &operator=(ConsumerToken const &) MOODYCAMEL_DELETE_FUNCTION;
	private:
		template<typename T, typename Traits> friend
		class ConcurrentQueue;
		friend class ConcurrentQueueTests;
	private: // but shared with ConcurrentQueue
		std::uint32_t initialOffset;
		std::uint32_t lastKnownGlobalOffset;
		std::uint32_t itemsConsumedFromCurrent;
		details::ConcurrentQueueProducerTypelessBase *currentProducer;
		details::ConcurrentQueueProducerTypelessBase *desiredProducer;
	};
// Need to forward-declare this swap because it's in a namespace.
// See http://stackoverflow.com/questions/4492062/why-does-a-c-friend-class-need-a-forward-declaration-only-in-other-namespaces
	template<typename T, typename Traits>
	inline void swap(typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP &a, typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP &b) MOODYCAMEL_NOEXCEPT;
	template<typename T, typename Traits = ConcurrentQueueDefaultTraits> EBCLASS(ConcurrentQueue){
			public:
			typedef::moodycamel::ProducerToken producer_token_t;
			typedef::moodycamel::ConsumerToken consumer_token_t;
			typedef typename Traits::index_t index_t;
			typedef typename Traits::u_index u_index;
			static const u_index BLOCK_SIZE=static_cast<u_index>(Traits::BLOCK_SIZE);
			static const u_index EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD=static_cast<u_index>(Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD);
			static const u_index EXPLICIT_INITIAL_INDEX_SIZE=static_cast<u_index>(Traits::EXPLICIT_INITIAL_INDEX_SIZE);
			static const u_index IMPLICIT_INITIAL_INDEX_SIZE=static_cast<u_index>(Traits::IMPLICIT_INITIAL_INDEX_SIZE);
			static const u_index INITIAL_IMPLICIT_PRODUCER_HASH_SIZE=static_cast<u_index>(Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE);
			static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE=static_cast<std::uint32_t>(Traits::EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE);
#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable: 4307)		// + integral constant overflow (that's what the ternary expression is for!)
#pragma warning(disable: 4309)		// static_cast: Truncation of constant value
#endif
			static const u_index MAX_SUBQUEUE_SIZE=(details::const_numeric_max<u_index>::value-static_cast<u_index>(Traits::MAX_SUBQUEUE_SIZE)<BLOCK_SIZE)?details::const_numeric_max<u_index>::value:((static_cast<u_index>(Traits::MAX_SUBQUEUE_SIZE)+(BLOCK_SIZE-1))/BLOCK_SIZE*BLOCK_SIZE);
#ifdef _MSC_VER
#pragma warning(pop)
#endif
			static_assert(!std::numeric_limits<u_index>::is_signed&&std::is_integral<u_index>::value, "Traits::u_index must be an unsigned integral type");
			static_assert(!std::numeric_limits<index_t>::is_signed&&std::is_integral<index_t>::value, "Traits::index_t must be an unsigned integral type");
			static_assert(sizeof(index_t)>=sizeof(u_index), "Traits::index_t must be at least as wide as Traits::u_index");
			static_assert((BLOCK_SIZE>1)&&!(BLOCK_SIZE&(BLOCK_SIZE-1)), "Traits::BLOCK_SIZE must be a power of 2 (and at least 2)");
			static_assert((EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD>1)&&!(EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD&(EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD-1)), "Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD must be a power of 2 (and greater than 1)");
			static_assert((EXPLICIT_INITIAL_INDEX_SIZE>1)&&!(EXPLICIT_INITIAL_INDEX_SIZE&(EXPLICIT_INITIAL_INDEX_SIZE-1)), "Traits::EXPLICIT_INITIAL_INDEX_SIZE must be a power of 2 (and greater than 1)");
			static_assert((IMPLICIT_INITIAL_INDEX_SIZE>1)&&!(IMPLICIT_INITIAL_INDEX_SIZE&(IMPLICIT_INITIAL_INDEX_SIZE-1)), "Traits::IMPLICIT_INITIAL_INDEX_SIZE must be a power of 2 (and greater than 1)");
			static_assert((INITIAL_IMPLICIT_PRODUCER_HASH_SIZE==0)||!(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE&(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE-1)), "Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE must be a power of 2");
			static_assert(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE==0||INITIAL_IMPLICIT_PRODUCER_HASH_SIZE>=1, "Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE must be at least 1 (or 0 to disable implicit enqueueing)");
			public:
			// Creates a queue with at least `capacity` element slots; note that the
			// actual number of elements that can be inserted without additional memory
			// allocation depends on the number of producers and the block size (e.g. if
			// the block size is equal to `capacity`, only a single block will be allocated
			// up-front, which means only a single producer will be able to enqueue elements
			// without an extra allocation -- blocks aren't shared between producers).
			// This method is not thread safe -- it is up to the user to ensure that the
			// queue is fully constructed before it starts being used by other threads (this
			// includes making the memory effects of construction visible, possibly with a
			// memory barrier).
			explicit ConcurrentQueue(u_index capacity=32*BLOCK_SIZE): producerListTail(nullptr), producerCount(0), initialBlockPoolIndex(0), nextExplicitConsumerId(0), globalExplicitConsumerOffset(0){
				implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
				populate_initial_implicit_producer_hash();
				populate_initial_block_list(capacity/BLOCK_SIZE+((capacity&(BLOCK_SIZE-1))==0?0:1));
#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
				// Track all the producers using a fully-resolved typed list for
		// each kind; this makes it possible to debug them starting from
		// the root queue object (otherwise wacky casts are needed that
		// don't compile in the debugger's expression evaluator).
		explicitProducers.store(nullptr, std::memory_order_relaxed);
		implicitProducers.store(nullptr, std::memory_order_relaxed);
#endif
			}
			// Computes the correct amount of pre-allocated blocks for you based
			// on the minimum number of elements you want available at any given
			// time, and the maximum concurrent number of each type of producer.
			ConcurrentQueue(u_index minCapacity, u_index maxExplicitProducers, u_index maxImplicitProducers): producerListTail(nullptr), producerCount(0), initialBlockPoolIndex(0), nextExplicitConsumerId(0), globalExplicitConsumerOffset(0){
				implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
				populate_initial_implicit_producer_hash();
				u_index blocks=(((minCapacity+BLOCK_SIZE-1)/BLOCK_SIZE)-1)*(maxExplicitProducers+1)+2*(maxExplicitProducers+maxImplicitProducers);
				populate_initial_block_list(blocks);
#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
				explicitProducers.store(nullptr, std::memory_order_relaxed);
		implicitProducers.store(nullptr, std::memory_order_relaxed);
#endif
			}
			// Note: The queue should not be accessed concurrently while it's
			// being deleted. It's up to the user to synchronize this.
			// This method is not thread safe.
			~ConcurrentQueue(){
				// Destroy producers
				auto paramRegPtr=producerListTail.load(std::memory_order_relaxed);
				while(paramRegPtr!=nullptr){
					auto next=paramRegPtr->next_prod();
					if(paramRegPtr->token!=nullptr){
						paramRegPtr->token->producer=nullptr;
					}
					destroy(paramRegPtr);
					paramRegPtr=next;
				}

				// Destroy implicit producer hash tables
				MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE!=0){
					auto hash=implicitProducerHash.load(std::memory_order_relaxed);
					while(hash!=nullptr){
						auto prev=hash->prev;
						if(prev!=nullptr){        // The last hash is part of this object and was not allocated dynamically
							for(u_index i=0;i!=hash->capacity;++i){
								hash->entries[i].~ImplicitProducerKVP();
							}
							hash->~ImplicitProducerHash();
							(Traits::free)(hash);
						}
						hash=prev;
					}
				}

				// Destroy global free list
				auto block=freeList.head_unsafe();
				while(block!=nullptr){
					auto next=block->freeListNext.load(std::memory_order_relaxed);
					if(block->dynamicallyAllocated){
						destroy(block);
					}
					block=next;
				}

				// Destroy initial free list
				destroy_array(initialBlockPool, initialBlockPoolSize);
			}
			// Disable copying and copy assignment
			ConcurrentQueue(ConcurrentQueue const &) MOODYCAMEL_DELETE_FUNCTION;
			ConcurrentQueue &operator=(ConcurrentQueue const &) MOODYCAMEL_DELETE_FUNCTION;
			// Moving is supported, but note that it is *not* a thread-safe operation.
			// Nobody can use the queue while it's being moved, and the memory effects
			// of that move must be propagated to other threads before they can use it.
			// Note: When a queue is moved, its tokens are still valid but can only be
			// used with the destination queue (i.e. semantically they are moved along
			// with the queue itself).
			ConcurrentQueue(ConcurrentQueue &&other) MOODYCAMEL_NOEXCEPT: producerListTail(other.producerListTail.load(std::memory_order_relaxed)), producerCount(other.producerCount.load(std::memory_order_relaxed)), initialBlockPoolIndex(other.initialBlockPoolIndex.load(std::memory_order_relaxed)), initialBlockPool(other.initialBlockPool), initialBlockPoolSize(other.initialBlockPoolSize), freeList(std::move(other.freeList)), nextExplicitConsumerId(other.nextExplicitConsumerId.load(std::memory_order_relaxed)), globalExplicitConsumerOffset(other.globalExplicitConsumerOffset.load(std::memory_order_relaxed)){
				// Move the other one into this, and leave the other one as an empty queue
				implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
				populate_initial_implicit_producer_hash();
				swap_implicit_producer_hashes(other);
				other.producerListTail.store(nullptr, std::memory_order_relaxed);
				other.producerCount.store(0, std::memory_order_relaxed);
				other.nextExplicitConsumerId.store(0, std::memory_order_relaxed);
				other.globalExplicitConsumerOffset.store(0, std::memory_order_relaxed);
#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
				explicitProducers.store(other.explicitProducers.load(std::memory_order_relaxed), std::memory_order_relaxed);
		other.explicitProducers.store(nullptr, std::memory_order_relaxed);
		implicitProducers.store(other.implicitProducers.load(std::memory_order_relaxed), std::memory_order_relaxed);
		other.implicitProducers.store(nullptr, std::memory_order_relaxed);
#endif
				other.initialBlockPoolIndex.store(0, std::memory_order_relaxed);
				other.initialBlockPoolSize=0;
				other.initialBlockPool=nullptr;
				reown_producers();
			}
			inline ConcurrentQueue &operator=(ConcurrentQueue &&other) MOODYCAMEL_NOEXCEPT{
				return swap_internal(other);
			}
			// Swaps this queue's state with the other's. Not thread-safe.
			// Swapping two queues does not invalidate their tokens, however
			// the tokens that were created for one queue must be used with
			// only the swapped queue (i.e. the tokens are tied to the
			// queue's movable state, not the object itself).
			inline void swap(ConcurrentQueue &other) MOODYCAMEL_NOEXCEPT{
				swap_internal(other);
			}
			private:
			ConcurrentQueue &swap_internal(ConcurrentQueue &other){
				if(this==&other){
					return *this;
				}
				details::swap_relaxed(producerListTail, other.producerListTail);
				details::swap_relaxed(producerCount, other.producerCount);
				details::swap_relaxed(initialBlockPoolIndex, other.initialBlockPoolIndex);
				std::swap(initialBlockPool, other.initialBlockPool);
				std::swap(initialBlockPoolSize, other.initialBlockPoolSize);
				freeList.swap(other.freeList);
				details::swap_relaxed(nextExplicitConsumerId, other.nextExplicitConsumerId);
				details::swap_relaxed(globalExplicitConsumerOffset, other.globalExplicitConsumerOffset);
				swap_implicit_producer_hashes(other);
				reown_producers();
				other.reown_producers();
#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
				details::swap_relaxed(explicitProducers, other.explicitProducers);
		details::swap_relaxed(implicitProducers, other.implicitProducers);
#endif
				return *this;
			}
			public:
			// Enqueues a single item (by copying it).
			// Allocates memory if required. Only fails if memory allocation fails (or implicit
			// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0,
			// or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
			// Thread-safe.
			inline bool enqueue(T const &item){
				MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE==0) return false;else return inner_enqueue<CanAlloc>(item);
			}
			// Enqueues a single item (by moving it, if possible).
			// Allocates memory if required. Only fails if memory allocation fails (or implicit
			// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0,
			// or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
			// Thread-safe.
			inline bool enqueue(T &&item){
				MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE==0) return false;else return inner_enqueue<CanAlloc>(std::move(item));
			}
			// Enqueues a single item (by copying it) using an explicit producer token.
			// Allocates memory if required. Only fails if memory allocation fails (or
			// Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
			// Thread-safe.
			inline bool enqueue(producer_token_t const &token, T const &item){
				return inner_enqueue<CanAlloc>(token, item);
			}
			// Enqueues a single item (by moving it, if possible) using an explicit producer token.
			// Allocates memory if required. Only fails if memory allocation fails (or
			// Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
			// Thread-safe.
			inline bool enqueue(producer_token_t const &token, T &&item){
				return inner_enqueue<CanAlloc>(token, std::move(item));
			}
			// Enqueues several items.
			// Allocates memory if required. Only fails if memory allocation fails (or
			// implicit production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
			// is 0, or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
			// Note: Use std::make_move_iterator if the elements should be moved instead of copied.
			// Thread-safe.
			template<typename It>
			bool enqueue_bulk(It itemFirst, u_index count){
				MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE==0) return false;else return inner_enqueue_bulk<CanAlloc>(itemFirst, count);
			}
			// Enqueues several items using an explicit producer token.
			// Allocates memory if required. Only fails if memory allocation fails
			// (or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
			// Note: Use std::make_move_iterator if the elements should be moved
			// instead of copied.
			// Thread-safe.
			template<typename It>
			bool enqueue_bulk(producer_token_t const &token, It itemFirst, u_index count){
				return inner_enqueue_bulk<CanAlloc>(token, itemFirst, count);
			}
			// Enqueues a single item (by copying it).
			// Does not allocate memory. Fails if not enough room to enqueue (or implicit
			// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
			// is 0).
			// Thread-safe.
			inline bool try_enqueue(T const &item){
				MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE==0) return false;else return inner_enqueue<CannotAlloc>(item);
			}
			// Enqueues a single item (by moving it, if possible).
			// Does not allocate memory (except for one-time implicit producer).
			// Fails if not enough room to enqueue (or implicit production is
			// disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).
			// Thread-safe.
			inline bool try_enqueue(T &&item){
				MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE==0) return false;else return inner_enqueue<CannotAlloc>(std::move(item));
			}
			// Enqueues a single item (by copying it) using an explicit producer token.
			// Does not allocate memory. Fails if not enough room to enqueue.
			// Thread-safe.
			inline bool try_enqueue(producer_token_t const &token, T const &item){
				return inner_enqueue<CannotAlloc>(token, item);
			}
			// Enqueues a single item (by moving it, if possible) using an explicit producer token.
			// Does not allocate memory. Fails if not enough room to enqueue.
			// Thread-safe.
			inline bool try_enqueue(producer_token_t const &token, T &&item){
				return inner_enqueue<CannotAlloc>(token, std::move(item));
			}
			// Enqueues several items.
			// Does not allocate memory (except for one-time implicit producer).
			// Fails if not enough room to enqueue (or implicit production is
			// disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).
			// Note: Use std::make_move_iterator if the elements should be moved
			// instead of copied.
			// Thread-safe.
			template<typename It>
			bool try_enqueue_bulk(It itemFirst, u_index count){
				MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE==0) return false;else return inner_enqueue_bulk<CannotAlloc>(itemFirst, count);
			}
			// Enqueues several items using an explicit producer token.
			// Does not allocate memory. Fails if not enough room to enqueue.
			// Note: Use std::make_move_iterator if the elements should be moved
			// instead of copied.
			// Thread-safe.
			template<typename It>
			bool try_enqueue_bulk(producer_token_t const &token, It itemFirst, u_index count){
				return inner_enqueue_bulk<CannotAlloc>(token, itemFirst, count);
			}
			// Attempts to dequeue from the queue.
			// Returns false if all producer streams appeared empty at the time they
			// were checked (so, the queue is likely but not guaranteed to be empty).
			// Never allocates. Thread-safe.
			template<typename U>
			bool try_dequeue(U &item){
				// Instead of simply trying each producer in turn (which could cause needless contention on the first
				// producer), we score them heuristically.
				u_index nonEmptyCount=0;
				ProducerBase *best=nullptr;
				u_index bestSize=0;
				for(auto paramRegPtr=producerListTail.load(std::memory_order_acquire);nonEmptyCount<3&&paramRegPtr!=nullptr;paramRegPtr=paramRegPtr->next_prod()){
					auto size=paramRegPtr->size_approx();
					if(size>0){
						if(size>bestSize){
							bestSize=size;
							best=paramRegPtr;
						}
						++nonEmptyCount;
					}
				}

				// If there was at least one non-empty queue but it appears empty at the time
				// we try to dequeue from it, we need to make sure every queue's been tried
				if(nonEmptyCount>0){
					if((details::likely)(best->dequeue(item))){
						return true;
					}
					for(auto paramRegPtr=producerListTail.load(std::memory_order_acquire);paramRegPtr!=nullptr;paramRegPtr=paramRegPtr->next_prod()){
						if(paramRegPtr!=best&&paramRegPtr->dequeue(item)){
							return true;
						}
					}
				}
				return false;
			}
			// Attempts to dequeue from the queue.
			// Returns false if all producer streams appeared empty at the time they
			// were checked (so, the queue is likely but not guaranteed to be empty).
			// This differs from the try_dequeue(item) method in that this one does
			// not attempt to reduce contention by interleaving the order that producer
			// streams are dequeued from. So, using this method can reduce overall throughput
			// under contention, but will give more predictable results in single-threaded
			// consumer scenarios. This is mostly only useful for internal unit tests.
			// Never allocates. Thread-safe.
			template<typename U>
			bool try_dequeue_non_interleaved(U &item){
				for(auto paramRegPtr=producerListTail.load(std::memory_order_acquire);paramRegPtr!=nullptr;paramRegPtr=paramRegPtr->next_prod()){
					if(paramRegPtr->dequeue(item)){
						return true;
					}
				}
				return false;
			}
			// Attempts to dequeue from the queue using an explicit consumer token.
			// Returns false if all producer streams appeared empty at the time they
			// were checked (so, the queue is likely but not guaranteed to be empty).
			// Never allocates. Thread-safe.
			template<typename U>
			bool try_dequeue(consumer_token_t &token, U &item){
				// The idea is roughly as follows:
				// Every 256 items from one producer, make everyone rotate (increase the global offset) -> this means the highest efficiency consumer dictates the rotation speed of everyone else, more or less
				// If you see that the global offset has changed, you must reset your consumption counter and move to your designated place
				// If there's no items where you're supposed to be, keep moving until you find a producer with some items
				// If the global offset has not changed but you've run out of items to consume, move over from your current position until you find an producer with something in it

				if(token.desiredProducer==nullptr||token.lastKnownGlobalOffset!=globalExplicitConsumerOffset.load(std::memory_order_relaxed)){
					if(!update_current_producer_after_rotation(token)){
						return false;
					}
				}

				// If there was at least one non-empty queue but it appears empty at the time
				// we try to dequeue from it, we need to make sure every queue's been tried
				if(static_cast<ProducerBase *>(token.currentProducer)->dequeue(item)){
					if(++token.itemsConsumedFromCurrent==EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE){
						globalExplicitConsumerOffset.fetch_add(1, std::memory_order_relaxed);
					}
					return true;
				}
				auto tail=producerListTail.load(std::memory_order_acquire);
				auto paramRegPtr=static_cast<ProducerBase *>(token.currentProducer)->next_prod();
				if(paramRegPtr==nullptr){
					paramRegPtr=tail;
				}
				while(paramRegPtr!=static_cast<ProducerBase *>(token.currentProducer)){
					if(paramRegPtr->dequeue(item)){
						token.currentProducer=paramRegPtr;
						token.itemsConsumedFromCurrent=1;
						return true;
					}
					paramRegPtr=paramRegPtr->next_prod();
					if(paramRegPtr==nullptr){
						paramRegPtr=tail;
					}
				}
				return false;
			}
			// Attempts to dequeue several elements from the queue.
			// Returns the number of items actually dequeued.
			// Returns 0 if all producer streams appeared empty at the time they
			// were checked (so, the queue is likely but not guaranteed to be empty).
			// Never allocates. Thread-safe.
			template<typename It>
			u_index try_dequeue_bulk(It itemFirst, u_index max){
				u_index count=0;
				for(auto paramRegPtr=producerListTail.load(std::memory_order_acquire);paramRegPtr!=nullptr;paramRegPtr=paramRegPtr->next_prod()){
					count+=paramRegPtr->dequeue_bulk(itemFirst, max-count);
					if(count==max){
						break;
					}
				}
				return count;
			}
			// Attempts to dequeue several elements from the queue using an explicit consumer token.
			// Returns the number of items actually dequeued.
			// Returns 0 if all producer streams appeared empty at the time they
			// were checked (so, the queue is likely but not guaranteed to be empty).
			// Never allocates. Thread-safe.
			template<typename It>
			u_index try_dequeue_bulk(consumer_token_t &token, It itemFirst, u_index max){
				if(token.desiredProducer==nullptr||token.lastKnownGlobalOffset!=globalExplicitConsumerOffset.load(std::memory_order_relaxed)){
					if(!update_current_producer_after_rotation(token)){
						return 0;
					}
				}
				u_index count=static_cast<ProducerBase *>(token.currentProducer)->dequeue_bulk(itemFirst, max);
				if(count==max){
					if((token.itemsConsumedFromCurrent+=static_cast<std::uint32_t>(max))>=EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE){
						globalExplicitConsumerOffset.fetch_add(1, std::memory_order_relaxed);
					}
					return max;
				}
				token.itemsConsumedFromCurrent+=static_cast<std::uint32_t>(count);
				max-=count;
				auto tail=producerListTail.load(std::memory_order_acquire);
				auto paramRegPtr=static_cast<ProducerBase *>(token.currentProducer)->next_prod();
				if(paramRegPtr==nullptr){
					paramRegPtr=tail;
				}
				while(paramRegPtr!=static_cast<ProducerBase *>(token.currentProducer)){
					auto dequeued=paramRegPtr->dequeue_bulk(itemFirst, max);
					count+=dequeued;
					if(dequeued!=0){
						token.currentProducer=paramRegPtr;
						token.itemsConsumedFromCurrent=static_cast<std::uint32_t>(dequeued);
					}
					if(dequeued==max){
						break;
					}
					max-=dequeued;
					paramRegPtr=paramRegPtr->next_prod();
					if(paramRegPtr==nullptr){
						paramRegPtr=tail;
					}
				}
				return count;
			}
			// Attempts to dequeue from a specific producer's inner queue.
			// If you happen to know which producer you want to dequeue from, this
			// is significantly faster than using the general-case try_dequeue methods.
			// Returns false if the producer's queue appeared empty at the time it
			// was checked (so, the queue is likely but not guaranteed to be empty).
			// Never allocates. Thread-safe.
			template<typename U>
			inline bool try_dequeue_from_producer(producer_token_t const &producer, U &item){
				return static_cast<ExplicitProducer *>(producer.producer)->dequeue(item);
			}
			// Attempts to dequeue several elements from a specific producer's inner queue.
			// Returns the number of items actually dequeued.
			// If you happen to know which producer you want to dequeue from, this
			// is significantly faster than using the general-case try_dequeue methods.
			// Returns 0 if the producer's queue appeared empty at the time it
			// was checked (so, the queue is likely but not guaranteed to be empty).
			// Never allocates. Thread-safe.
			template<typename It>
			inline u_index try_dequeue_bulk_from_producer(producer_token_t const &producer, It itemFirst, u_index max){
				return static_cast<ExplicitProducer *>(producer.producer)->dequeue_bulk(itemFirst, max);
			}
			// Returns an estimate of the total number of elements currently in the queue. This
			// estimate is only accurate if the queue has completely stabilized before it is called
			// (i.e. all enqueue and dequeue operations have completed and their memory effects are
			// visible on the calling thread, and no further operations start while this method is
			// being called).
			// Thread-safe.
			u_index size_approx() const{
				u_index size=0;
				for(auto paramRegPtr=producerListTail.load(std::memory_order_acquire);paramRegPtr!=nullptr;paramRegPtr=paramRegPtr->next_prod()){
					size+=paramRegPtr->size_approx();
				}
				return size;
			}
			// Returns true if the underlying atomic variables used by
			// the queue are lock-free (they should be on most platforms).
			// Thread-safe.
			static constexpr bool is_lock_free(){
				return details::static_is_lock_free<bool>::value==2&&
					   details::static_is_lock_free<u_index>::value==2&&
					   details::static_is_lock_free<std::uint32_t>::value==2&&
					   details::static_is_lock_free<index_t>::value==2&&
					   details::static_is_lock_free<void *>::value==2&&
					   details::static_is_lock_free<typename details::thread_id_converter<details::thread_id_t>::thread_id_numeric_u_index>::value==2;
			}
			private:
			friend struct ProducerToken;
			friend struct ConsumerToken;
			struct ExplicitProducer;
			friend struct ExplicitProducer;
			struct ImplicitProducer;
			friend struct ImplicitProducer;
			friend class ConcurrentQueueTests;
			enum AllocationMode{CanAlloc, CannotAlloc};


			///////////////////////////////
			// Queue methods
			///////////////////////////////

			template<AllocationMode canAlloc, typename U>
			inline bool inner_enqueue(producer_token_t const &token, U &&element){
				return static_cast<ExplicitProducer *>(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue<canAlloc>(std::forward<U>(element));
			}
			template<AllocationMode canAlloc, typename U>
			inline bool inner_enqueue(U &&element){
				auto producer=get_or_add_implicit_producer();
				return producer==nullptr?false:producer->ConcurrentQueue::ImplicitProducer::template enqueue<canAlloc>(std::forward<U>(element));
			}
			template<AllocationMode canAlloc, typename It>
			inline bool inner_enqueue_bulk(producer_token_t const &token, It itemFirst, u_index count){
				return static_cast<ExplicitProducer *>(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue_bulk<canAlloc>(itemFirst, count);
			}
			template<AllocationMode canAlloc, typename It>
			inline bool inner_enqueue_bulk(It itemFirst, u_index count){
				auto producer=get_or_add_implicit_producer();
				return producer==nullptr?false:producer->ConcurrentQueue::ImplicitProducer::template enqueue_bulk<canAlloc>(itemFirst, count);
			}
			inline bool update_current_producer_after_rotation(consumer_token_t &token){
				// Ah, there's been a rotation, figure out where we should be!
				auto tail=producerListTail.load(std::memory_order_acquire);
				if(token.desiredProducer==nullptr&&tail==nullptr){
					return false;
				}
				auto prodCount=producerCount.load(std::memory_order_relaxed);
				auto globalOffset=globalExplicitConsumerOffset.load(std::memory_order_relaxed);
				if((details::unlikely)(token.desiredProducer==nullptr)){
					// Aha, first time we're dequeueing anything.
					// Figure out our local position
					// Note: offset is from start, not end, but we're traversing from end -- subtract from count first
					std::uint32_t offset=prodCount-1-(token.initialOffset%prodCount);
					token.desiredProducer=tail;
					for(std::uint32_t i=0;i!=offset;++i){
						token.desiredProducer=static_cast<ProducerBase *>(token.desiredProducer)->next_prod();
						if(token.desiredProducer==nullptr){
							token.desiredProducer=tail;
						}
					}
				}
				std::uint32_t delta=globalOffset-token.lastKnownGlobalOffset;
				if(delta>=prodCount){
					delta=delta%prodCount;
				}
				for(std::uint32_t i=0;i!=delta;++i){
					token.desiredProducer=static_cast<ProducerBase *>(token.desiredProducer)->next_prod();
					if(token.desiredProducer==nullptr){
						token.desiredProducer=tail;
					}
				}
				token.lastKnownGlobalOffset=globalOffset;
				token.currentProducer=token.desiredProducer;
				token.itemsConsumedFromCurrent=0;
				return true;
			}


			///////////////////////////
			// Free list
			///////////////////////////

			template<typename N>
			struct FreeListNode{
				FreeListNode(): freeListRefs(0), freeListNext(nullptr)
				{}
				std::atomic <std::uint32_t> freeListRefs;
				std::atomic<N*>freeListNext;
			};
			// A simple CAS-based lock-free free list. Not the fastest thing in the world under heavy contention, but
			// simple and correct (assuming nodes are never freed until after the free list is destroyed), and fairly
			// speedy under low contention.
			template<typename N>        // N must inherit FreeListNode or have the same fields (and initialization of them)
			struct FreeList{
				FreeList(): freeListHead(nullptr)
				{}
				FreeList(FreeList&&other): freeListHead(other.freeListHead.load(std::memory_order_relaxed))
				{other.freeListHead.store(nullptr, std::memory_order_relaxed);}
				void swap(FreeList &other){details::swap_relaxed(freeListHead, other.freeListHead);}
				FreeList(FreeList const &) MOODYCAMEL_DELETE_FUNCTION;
				FreeList &operator=(FreeList const &) MOODYCAMEL_DELETE_FUNCTION;
				inline void add(N *node){
#ifdef MCDBGQ_NOLOCKFREE_FREELIST
					debug::DebugLock lock(mutex);
#endif
					// We know that the should-be-on-freelist bit is 0 at this point, so it's safe to
					// set it using a fetch_add
					if(node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST, std::memory_order_acq_rel)==0){
						// Oh look! We were the last ones referencing this node, and we know
						// we want to add it to the free list, so let's do it!
						add_knowing_refcount_is_zero(node);
					}
				}
				inline N *try_get(){
#ifdef MCDBGQ_NOLOCKFREE_FREELIST
					debug::DebugLock lock(mutex);
#endif
					auto head=freeListHead.load(std::memory_order_acquire);
					while(head!=nullptr){
						auto prevHead=head;
						auto refs=head->freeListRefs.load(std::memory_order_relaxed);
						if((refs&REFS_MASK)==0||!head->freeListRefs.compare_exchange_strong(refs, refs+1, std::memory_order_acquire, std::memory_order_relaxed)){
							head=freeListHead.load(std::memory_order_acquire);
							continue;
						}

						// Good, reference count has been incremented (it wasn't at zero), which means we can read the
						// next and not worry about it changing between now and the time we do the CAS
						auto next=head->freeListNext.load(std::memory_order_relaxed);
						if(freeListHead.compare_exchange_strong(head, next, std::memory_order_acquire, std::memory_order_relaxed)){
							// Yay, got the node. This means it was on the list, which means shouldBeOnFreeList must be false no
							// matter the refcount (because nobody else knows it's been taken off yet, it can't have been put back on).
							assert((head->freeListRefs.load(std::memory_order_relaxed)&SHOULD_BE_ON_FREELIST)==0);

							// Decrease refcount twice, once for our ref, and once for the list's ref
							head->freeListRefs.fetch_sub(2, std::memory_order_release);
							return head;
						}

						// OK, the head must have changed on us, but we still need to decrease the refcount we increased.
						// Note that we don't need to release any memory effects, but we do need to ensure that the reference
						// count decrement happens-after the CAS on the head.
						refs=prevHead->freeListRefs.fetch_sub(1, std::memory_order_acq_rel);
						if(refs==SHOULD_BE_ON_FREELIST+1){
							add_knowing_refcount_is_zero(prevHead);
						}
					}
					return nullptr;
				}
				// Useful for traversing the list when there's no contention (e.g. to destroy remaining nodes)
				N *head_unsafe() const{return freeListHead.load(std::memory_order_relaxed);}
				private:
				inline void add_knowing_refcount_is_zero(N *node){
					// Since the refcount is zero, and nobody can increase it once it's zero (except us, and we run
					// only one copy of this method per node at a time, i.e. the single thread case), then we know
					// we can safely change the next pointer of the node; however, once the refcount is back above
					// zero, then other threads could increase it (happens under heavy contention, when the refcount
					// goes to zero in between a load and a refcount increment of a node in try_get, then back up to
					// something non-zero, then the refcount increment is done by the other thread) -- so, if the CAS
					// to add the node to the actual list fails, decrease the refcount and leave the add operation to
					// the next thread who puts the refcount back at zero (which could be us, hence the loop).
					auto head=freeListHead.load(std::memory_order_relaxed);
					while(true){
						node->freeListNext.store(head, std::memory_order_relaxed);
						node->freeListRefs.store(1, std::memory_order_release);
						if(!freeListHead.compare_exchange_strong(head, node, std::memory_order_release, std::memory_order_relaxed)){
							// Hmm, the add failed, but we can only try again when the refcount goes back to zero
							if(node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST-1, std::memory_order_release)==1){
								continue;
							}
						}
						return;
					}
				}
				private:
				// Implemented like a stack, but where node order doesn't matter (nodes are inserted out of order under contention)
				std::atomic<N*>freeListHead;
				static const std::uint32_t REFS_MASK=0x7FFFFFFF;
				static const std::uint32_t SHOULD_BE_ON_FREELIST=0x80000000;
#ifdef MCDBGQ_NOLOCKFREE_FREELIST
				debug::DebugMutex mutex;
#endif
			};


			///////////////////////////
			// Block
			///////////////////////////

			enum InnerQueueContext{implicit_context=0, explicit_context=1};
			struct Block{
				Block(): next(nullptr), elementsCompletelyDequeued(0), freeListRefs(0), freeListNext(nullptr), dynamicallyAllocated(true)
				{
#ifdef MCDBGQ_TRACKMEM
					owner = nullptr;
#endif
				}
				template<InnerQueueContext context>
				inline bool is_empty() const{
					MOODYCAMEL_CONSTEXPR_IF(context==explicit_context&&BLOCK_SIZE<=EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD){
						// Check flags
						for(u_index i=0;i<BLOCK_SIZE;++i){
							if(!emptyFlags[i].load(std::memory_order_relaxed)){
								return false;
							}
						}

						// Aha, empty; make sure we have all other memory effects that happened before the empty flags were set
						std::atomic_thread_fence(std::memory_order_acquire);
						return true;
					}else{
						// Check counter
						if(elementsCompletelyDequeued.load(std::memory_order_relaxed)==BLOCK_SIZE){
							std::atomic_thread_fence(std::memory_order_acquire);
							return true;
						}
						assert(elementsCompletelyDequeued.load(std::memory_order_relaxed)<=BLOCK_SIZE);
						return false;
					}
				}
				// Returns true if the block is now empty (does not apply in explicit context)
				template<InnerQueueContext context>
				inline bool set_empty(MOODYCAMEL_MAYBE_UNUSED index_t i){
					MOODYCAMEL_CONSTEXPR_IF(context==explicit_context&&BLOCK_SIZE<=EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD){
						// Set flag
						assert(!emptyFlags[BLOCK_SIZE-1-static_cast<u_index>(i&static_cast<index_t>(BLOCK_SIZE-1))].load(std::memory_order_relaxed));
						emptyFlags[BLOCK_SIZE-1-static_cast<u_index>(i&static_cast<index_t>(BLOCK_SIZE-1))].store(true, std::memory_order_release);
						return false;
					}else{
						// Increment counter
						auto prevVal=elementsCompletelyDequeued.fetch_add(1, std::memory_order_release);
						assert(prevVal<BLOCK_SIZE);
						return prevVal==BLOCK_SIZE-1;
					}
				}
				// Sets multiple contiguous item statuses to 'empty' (assumes no wrapping and count > 0).
				// Returns true if the block is now empty (does not apply in explicit context).
				template<InnerQueueContext context>
				inline bool set_many_empty(MOODYCAMEL_MAYBE_UNUSED index_t i, u_index count){
					MOODYCAMEL_CONSTEXPR_IF(context==explicit_context&&BLOCK_SIZE<=EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD){
						// Set flags
						std::atomic_thread_fence(std::memory_order_release);
						i=BLOCK_SIZE-1-static_cast<u_index>(i&static_cast<index_t>(BLOCK_SIZE-1))-count+1;
						for(u_index j=0;j!=count;++j){
							assert(!emptyFlags[i+j].load(std::memory_order_relaxed));
							emptyFlags[i+j].store(true, std::memory_order_relaxed);
						}
						return false;
					}else{
						// Increment counter
						auto prevVal=elementsCompletelyDequeued.fetch_add(count, std::memory_order_release);
						assert(prevVal+count<=BLOCK_SIZE);
						return prevVal+count==BLOCK_SIZE;
					}
				}
				template<InnerQueueContext context>
				inline void set_all_empty(){
					MOODYCAMEL_CONSTEXPR_IF(context==explicit_context&&BLOCK_SIZE<=EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD){
						// Set all flags
						for(u_index i=0;i!=BLOCK_SIZE;++i){
							emptyFlags[i].store(true, std::memory_order_relaxed);
						}
					}else{
						// Reset counter
						elementsCompletelyDequeued.store(BLOCK_SIZE, std::memory_order_relaxed);
					}
				}
				template<InnerQueueContext context>
				inline void reset_empty(){
					MOODYCAMEL_CONSTEXPR_IF(context==explicit_context&&BLOCK_SIZE<=EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD){
						// Reset flags
						for(u_index i=0;i!=BLOCK_SIZE;++i){
							emptyFlags[i].store(false, std::memory_order_relaxed);
						}
					}else{
						// Reset counter
						elementsCompletelyDequeued.store(0, std::memory_order_relaxed);
					}
				}
				inline T *operator[](index_t idx) MOODYCAMEL_NOEXCEPT{return static_cast<T *>(static_cast<void *>(elements))+static_cast<u_index>(idx&static_cast<index_t>(BLOCK_SIZE-1));}
				inline T const *operator[](index_t idx) const MOODYCAMEL_NOEXCEPT{
					return static_cast<T const *>(static_cast<void const *>(elements))+static_cast<u_index>(idx&static_cast<index_t>(BLOCK_SIZE-1));
				}
				private:
				static_assert(std::alignment_of<T>::value<=sizeof(T), "The queue does not support types with an alignment greater than their size at this time");
				MOODYCAMEL_ALIGNED_TYPE_LIKE(char[sizeof(T)*BLOCK_SIZE], T) elements;
				public:
				Block *next;
				std::atomic <u_index> elementsCompletelyDequeued;
				std::atomic<bool> emptyFlags[BLOCK_SIZE<=EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD?BLOCK_SIZE:1];
				public:
				std::atomic <std::uint32_t> freeListRefs;
				std::atomic<Block*>freeListNext;
				bool dynamicallyAllocated;        // Perhaps a better name for this would be 'isNotPartOfInitialBlockPool'

#ifdef MCDBGQ_TRACKMEM
				void* owner;
#endif
			};
			static_assert(std::alignment_of<Block>::value>=std::alignment_of<T>::value, "Internal error: Blocks must be at least as aligned as the type they are wrapping");
#ifdef MCDBGQ_TRACKMEM
			public:
		struct MemStats;
		private:
#endif

			///////////////////////////
			// Producer base
			///////////////////////////

			struct ProducerBase :
			public details::ConcurrentQueueProducerTypelessBase{
				ProducerBase(ConcurrentQueue*parent_, bool
				isExplicit_): tailIndex(0), headIndex(0), dequeueOptimisticCount(0), dequeueOvercommit(0), tailBlock(nullptr), isExplicit(isExplicit_), parent(parent_)
				{
				}
				virtual ~ProducerBase()
				{}
				template<typename U>
				inline bool dequeue(U &element){
					if(isExplicit){
						return static_cast<ExplicitProducer *>(this)->dequeue(element);
					}else{
						return static_cast<ImplicitProducer *>(this)->dequeue(element);
					}
				}
				template<typename It>
				inline u_index dequeue_bulk(It &itemFirst, u_index max){
					if(isExplicit){
						return static_cast<ExplicitProducer *>(this)->dequeue_bulk(itemFirst, max);
					}else{
						return static_cast<ImplicitProducer *>(this)->dequeue_bulk(itemFirst, max);
					}
				}
				inline ProducerBase *next_prod() const{return static_cast<ProducerBase *>(next);}
				inline u_index size_approx() const{
					auto tail=tailIndex.load(std::memory_order_relaxed);
					auto head=headIndex.load(std::memory_order_relaxed);
					return details::circular_less_than(head, tail)?static_cast<u_index>(tail-head):0;
				}
				inline index_t getTail() const{return tailIndex.load(std::memory_order_relaxed);}
				protected:
				std::atomic <index_t> tailIndex;        // Where to enqueue to next
				std::atomic <index_t> headIndex;        // Where to dequeue from next

				std::atomic <index_t> dequeueOptimisticCount;
				std::atomic <index_t> dequeueOvercommit;
				Block *tailBlock;
				public:
				bool isExplicit;
				ConcurrentQueue *parent;
				protected:
#ifdef MCDBGQ_TRACKMEM
				friend struct MemStats;
#endif
			};


			///////////////////////////
			// Explicit queue
			///////////////////////////

			struct ExplicitProducer :
			public ProducerBase{
				explicit ExplicitProducer(ConcurrentQueue
				*parent_): ProducerBase(parent_, true), blockIndex(nullptr), pr_blockIndexSlotsUsed(0), pr_blockIndexSize(EXPLICIT_INITIAL_INDEX_SIZE>>1), pr_blockIndexFront(0), pr_blockIndexEntries(
						nullptr), pr_blockIndexRaw(nullptr)
				{
					u_index poolBasedIndexSize=details::ceil_to_pow_2(parent_->initialBlockPoolSize)>>1;
					if(poolBasedIndexSize>pr_blockIndexSize){
						pr_blockIndexSize=poolBasedIndexSize;
					}
					new_block_index(0);        // This creates an index with double the number of current entries, i.e. EXPLICIT_INITIAL_INDEX_SIZE
				}
				~ExplicitProducer()
				{
					// Destruct any elements not yet dequeued.
					// Since we're in the destructor, we can assume all elements
					// are either completely dequeued or completely not (no halfways).
					if(this->tailBlock!=nullptr){        // Note this means there must be a block index too
						// First find the block that's partially dequeued, if any
						Block *halfDequeuedBlock=nullptr;
						if((this->headIndex.load(std::memory_order_relaxed)&static_cast<index_t>(BLOCK_SIZE-1))!=0){
							// The head's not on a block boundary, meaning a block somewhere is partially dequeued
							// (or the head block is the tail block and was fully dequeued, but the head/tail are still not on a boundary)
							u_index i=(pr_blockIndexFront-pr_blockIndexSlotsUsed)&(pr_blockIndexSize-1);
							while(details::circular_less_than<index_t>(pr_blockIndexEntries[i].base+BLOCK_SIZE, this->headIndex.load(std::memory_order_relaxed))){
								i=(i+1)&(pr_blockIndexSize-1);
							}
							assert(details::circular_less_than<index_t>(pr_blockIndexEntries[i].base, this->headIndex.load(std::memory_order_relaxed)));
							halfDequeuedBlock=pr_blockIndexEntries[i].block;
						}

						// Start at the head block (note the first line in the loop gives us the head from the tail on the first iteration)
						auto block=this->tailBlock;
						do{
							block=block->next;
							if(block->ConcurrentQueue::Block::template is_empty<explicit_context>()){
								continue;
							}
							u_index i=0;    // Offset into block
							if(block==halfDequeuedBlock){
								i=static_cast<u_index>(this->headIndex.load(std::memory_order_relaxed)&static_cast<index_t>(BLOCK_SIZE-1));
							}

							// Walk through all the items in the block; if this is the tail block, we need to stop when we reach the tail index
							auto lastValidIndex=(this->tailIndex.load(std::memory_order_relaxed)&static_cast<index_t>(BLOCK_SIZE-1))==0?BLOCK_SIZE:static_cast<u_index>(this->tailIndex
																																										   .load(std::memory_order_relaxed)&
																																									   static_cast<index_t>(BLOCK_SIZE-
																																															1));
							while(i!=BLOCK_SIZE&&(block!=this->tailBlock||i!=lastValidIndex)){
								(*block)[i++]->~T();
							}
						}while(block!=this->tailBlock);
					}

					// Destroy all blocks that we own
					if(this->tailBlock!=nullptr){
						auto block=this->tailBlock;
						do{
							auto nextBlock=block->next;
							this->parent->add_block_to_free_list(block);
							block=nextBlock;
						}while(block!=this->tailBlock);
					}

					// Destroy the block indices
					auto header=static_cast<BlockIndexHeader *>(pr_blockIndexRaw);
					while(header!=nullptr){
						auto prev=static_cast<BlockIndexHeader *>(header->prev);
						header->~BlockIndexHeader();
						(Traits::free)(header);
						header=prev;
					}
				}
				template<AllocationMode allocMode, typename U>
				inline bool enqueue(U &&element){
					index_t currentTailIndex=this->tailIndex.load(std::memory_order_relaxed);
					index_t newTailIndex=1+currentTailIndex;
					if((currentTailIndex&static_cast<index_t>(BLOCK_SIZE-1))==0){
						// We reached the end of a block, start a new one
						auto startBlock=this->tailBlock;
						auto originalBlockIndexSlotsUsed=pr_blockIndexSlotsUsed;
						if(this->tailBlock!=nullptr&&this->tailBlock->next->ConcurrentQueue::Block::template is_empty<explicit_context>()){
							// We can re-use the block ahead of us, it's empty!
							this->tailBlock=this->tailBlock->next;
							this->tailBlock->ConcurrentQueue::Block::template reset_empty<explicit_context>();

							// We'll put the block on the block index (guaranteed to be room since we're conceptually removing the
							// last block from it first -- except instead of removing then adding, we can just overwrite).
							// Note that there must be a valid block index here, since even if allocation failed in the ctor,
							// it would have been re-attempted when adding the first block to the queue; since there is such
							// a block, a block index must have been successfully allocated.
						}else{
							// Whatever head value we see here is >= the last value we saw here (relatively),
							// and <= its current value. Since we have the most recent tail, the head must be
							// <= to it.
							auto head=this->headIndex.load(std::memory_order_relaxed);
							assert(!details::circular_less_than<index_t>(currentTailIndex, head));
							if(!details::circular_less_than<index_t>(head, currentTailIndex+BLOCK_SIZE)||
							   (MAX_SUBQUEUE_SIZE!=details::const_numeric_max<u_index>::value&&(MAX_SUBQUEUE_SIZE==0||MAX_SUBQUEUE_SIZE-BLOCK_SIZE<currentTailIndex-head))){
								// We can't enqueue in another block because there's not enough leeway -- the
								// tail could surpass the head by the time the block fills up! (Or we'll exceed
								// the size limit, if the second part of the condition was true.)
								return false;
							}
							// We're going to need a new block; check that the block index has room
							if(pr_blockIndexRaw==nullptr||pr_blockIndexSlotsUsed==pr_blockIndexSize){
								// Hmm, the circular block index is already full -- we'll need
								// to allocate a new index. Note pr_blockIndexRaw can only be nullptr if
								// the initial allocation failed in the constructor.

								MOODYCAMEL_CONSTEXPR_IF(allocMode==CannotAlloc){
									return false;
								}else if(!new_block_index(pr_blockIndexSlotsUsed)){
									return false;
								}
							}

							// Insert a new block in the circular linked list
							auto newBlock=this->parent->ConcurrentQueue::template requisition_block<allocMode>();
							if(newBlock==nullptr){
								return false;
							}
#ifdef MCDBGQ_TRACKMEM
							newBlock->owner = this;
#endif
							newBlock->ConcurrentQueue::Block::template reset_empty<explicit_context>();
							if(this->tailBlock==nullptr){
								newBlock->next=newBlock;
							}else{
								newBlock->next=this->tailBlock->next;
								this->tailBlock->next=newBlock;
							}
							this->tailBlock=newBlock;
							++pr_blockIndexSlotsUsed;
						}
						MOODYCAMEL_CONSTEXPR_IF(!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new(static_cast<T *>(nullptr)) T(std::forward<U>(element)))){
							// The constructor may throw. We want the element not to appear in the queue in
							// that case (without corrupting the queue):
							MOODYCAMEL_TRY{
								new((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element));
							}MOODYCAMEL_CATCH (...){
								// Revert change to the current block, but leave the new block available
								// for next time
								pr_blockIndexSlotsUsed=originalBlockIndexSlotsUsed;
								this->tailBlock=startBlock==nullptr?this->tailBlock:startBlock;
								MOODYCAMEL_RETHROW;
							}
						}else{
							(void)startBlock;
							(void)originalBlockIndexSlotsUsed;
						}

						// Add block to block index
						auto &entry=blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront];
						entry.base=currentTailIndex;
						entry.block=this->tailBlock;
						blockIndex.load(std::memory_order_relaxed)->front.store(pr_blockIndexFront, std::memory_order_release);
						pr_blockIndexFront=(pr_blockIndexFront+1)&(pr_blockIndexSize-1);
						MOODYCAMEL_CONSTEXPR_IF(!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new(static_cast<T *>(nullptr)) T(std::forward<U>(element)))){
							this->tailIndex.store(newTailIndex, std::memory_order_release);
							return true;
						}
					}

					// Enqueue
					new((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element));
					this->tailIndex.store(newTailIndex, std::memory_order_release);
					return true;
				}
				template<typename U>
				bool dequeue(U &element){
					auto tail=this->tailIndex.load(std::memory_order_relaxed);
					auto overcommit=this->dequeueOvercommit.load(std::memory_order_relaxed);
					if(details::circular_less_than<index_t>(this->dequeueOptimisticCount.load(std::memory_order_relaxed)-overcommit, tail)){
						// Might be something to dequeue, let's give it a try

						// Note that this if is purely for performance purposes in the common case when the queue is
						// empty and the values are eventually consistent -- we may enter here spuriously.

						// Note that whatever the values of overcommit and tail are, they are not going to change (unless we
						// change them) and must be the same value at this point (inside the if) as when the if condition was
						// evaluated.

						// We insert an acquire fence here to synchronize-with the release upon incrementing dequeueOvercommit below.
						// This ensures that whatever the value we got loaded into overcommit, the load of dequeueOptisticCount in
						// the fetch_add below will result in a value at least as recent as that (and therefore at least as large).
						// Note that I believe a compiler (signal) fence here would be sufficient due to the nature of fetch_add (all
						// read-modify-write operations are guaranteed to work on the latest value in the modification order), but
						// unfortunately that can't be shown to be correct using only the C++11 standard.
						// See http://stackoverflow.com/questions/18223161/what-are-the-c11-memory-ordering-guarantees-in-this-corner-case
						std::atomic_thread_fence(std::memory_order_acquire);

						// Increment optimistic counter, then check if it went over the boundary
						auto myDequeueCount=this->dequeueOptimisticCount.fetch_add(1, std::memory_order_relaxed);

						// Note that since dequeueOvercommit must be <= dequeueOptimisticCount (because dequeueOvercommit is only ever
						// incremented after dequeueOptimisticCount -- this is enforced in the `else` block below), and since we now
						// have a version of dequeueOptimisticCount that is at least as recent as overcommit (due to the release upon
						// incrementing dequeueOvercommit and the acquire above that synchronizes with it), overcommit <= myDequeueCount.
						// However, we can't assert this since both dequeueOptimisticCount and dequeueOvercommit may (independently)
						// overflow; in such a case, though, the logic still holds since the difference between the two is maintained.

						// Note that we reload tail here in case it changed; it will be the same value as before or greater, since
						// this load is sequenced after (happens after) the earlier load above. This is supported by read-read
						// coherency (as defined in the standard), explained here: http://en.cppreference.com/w/cpp/atomic/memory_order
						tail=this->tailIndex.load(std::memory_order_acquire);
						if((details::likely)(details::circular_less_than<index_t>(myDequeueCount-overcommit, tail))){
							// Guaranteed to be at least one element to dequeue!

							// Get the index. Note that since there's guaranteed to be at least one element, this
							// will never exceed tail. We need to do an acquire-release fence here since it's possible
							// that whatever condition got us to this point was for an earlier enqueued element (that
							// we already see the memory effects for), but that by the time we increment somebody else
							// has incremented it, and we need to see the memory effects for *that* element, which is
							// in such a case is necessarily visible on the thread that incremented it in the first
							// place with the more current condition (they must have acquired a tail that is at least
							// as recent).
							auto index=this->headIndex.fetch_add(1, std::memory_order_acq_rel);


							// Determine which block the element is in

							auto localBlockIndex=blockIndex.load(std::memory_order_acquire);
							auto localBlockIndexHead=localBlockIndex->front.load(std::memory_order_acquire);

							// We need to be careful here about subtracting and dividing because of index wrap-around.
							// When an index wraps, we need to preserve the sign of the offset when dividing it by the
							// block size (in order to get a correct signed block count offset in all cases):
							auto headBase=localBlockIndex->entries[localBlockIndexHead].base;
							auto blockBaseIndex=index&~static_cast<index_t>(BLOCK_SIZE-1);
							auto offset=static_cast<u_index>(static_cast<typename std::make_signed<index_t>::type>(blockBaseIndex-headBase)/
															static_cast<typename std::make_signed<index_t>::type>(BLOCK_SIZE));
							auto block=localBlockIndex->entries[(localBlockIndexHead+offset)&(localBlockIndex->size-1)].block;

							// Dequeue
							auto &el=*((*block)[index]);
							if(!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, element=std::move(el))){
								// Make sure the element is still fully dequeued and destroyed even if the assignment
								// throws
								struct Guard{
									Block *block;
									index_t index;
									~Guard(){
										(*block)[index]->~T();
										block->ConcurrentQueue::Block::template set_empty<explicit_context>(index);
									}
								}guard={block, index};
								element=std::move(el); // NOLINT
							}else{
								element=std::move(el); // NOLINT
								el.~T(); // NOLINT
								block->ConcurrentQueue::Block::template set_empty<explicit_context>(index);
							}
							return true;
						}else{
							// Wasn't anything to dequeue after all; make the effective dequeue count eventually consistent
							this->dequeueOvercommit.fetch_add(1, std::memory_order_release);        // Release so that the fetch_add on dequeueOptimisticCount is guaranteed to happen before this write
						}
					}
					return false;
				}
				template<AllocationMode allocMode, typename It>
				bool MOODYCAMEL_NO_TSAN enqueue_bulk(It itemFirst, u_index count){
					// First, we need to make sure we have enough room to enqueue all of the elements;
					// this means pre-allocating blocks and putting them in the block index (but only if
					// all the allocations succeeded).
					index_t startTailIndex=this->tailIndex.load(std::memory_order_relaxed);
					auto startBlock=this->tailBlock;
					auto originalBlockIndexFront=pr_blockIndexFront;
					auto originalBlockIndexSlotsUsed=pr_blockIndexSlotsUsed;
					Block *firstAllocatedBlock=nullptr;

					// Figure out how many blocks we'll need to allocate, and do so
					u_index blockBaseDiff=((startTailIndex+count-1)&~static_cast<index_t>(BLOCK_SIZE-1))-((startTailIndex-1)&~static_cast<index_t>(BLOCK_SIZE-1));
					index_t currentTailIndex=(startTailIndex-1)&~static_cast<index_t>(BLOCK_SIZE-1);
					if(blockBaseDiff>0){
						// Allocate as many blocks as possible from ahead
						while(blockBaseDiff>0&&
							  this->tailBlock!=nullptr&&
							  this->tailBlock->next!=firstAllocatedBlock&&
							  this->tailBlock->next->ConcurrentQueue::Block::template is_empty<explicit_context>()){
							blockBaseDiff-=static_cast<index_t>(BLOCK_SIZE);
							currentTailIndex+=static_cast<index_t>(BLOCK_SIZE);
							this->tailBlock=this->tailBlock->next;
							firstAllocatedBlock=firstAllocatedBlock==nullptr?this->tailBlock:firstAllocatedBlock;
							auto &entry=blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront];
							entry.base=currentTailIndex;
							entry.block=this->tailBlock;
							pr_blockIndexFront=(pr_blockIndexFront+1)&(pr_blockIndexSize-1);
						}

						// Now allocate as many blocks as necessary from the block pool
						while(blockBaseDiff>0){
							blockBaseDiff-=static_cast<index_t>(BLOCK_SIZE);
							currentTailIndex+=static_cast<index_t>(BLOCK_SIZE);
							auto head=this->headIndex.load(std::memory_order_relaxed);
							assert(!details::circular_less_than<index_t>(currentTailIndex, head));
							bool full=!details::circular_less_than<index_t>(head, currentTailIndex+BLOCK_SIZE)||
									  (MAX_SUBQUEUE_SIZE!=details::const_numeric_max<u_index>::value&&(MAX_SUBQUEUE_SIZE==0||MAX_SUBQUEUE_SIZE-BLOCK_SIZE<currentTailIndex-head));
							if(pr_blockIndexRaw==nullptr||pr_blockIndexSlotsUsed==pr_blockIndexSize||full){
								MOODYCAMEL_CONSTEXPR_IF(allocMode==CannotAlloc){
									// Failed to allocate, undo changes (but keep injected blocks)
									pr_blockIndexFront=originalBlockIndexFront;
									pr_blockIndexSlotsUsed=originalBlockIndexSlotsUsed;
									this->tailBlock=startBlock==nullptr?firstAllocatedBlock:startBlock;
									return false;
								}else if(full||!new_block_index(originalBlockIndexSlotsUsed)){
									// Failed to allocate, undo changes (but keep injected blocks)
									pr_blockIndexFront=originalBlockIndexFront;
									pr_blockIndexSlotsUsed=originalBlockIndexSlotsUsed;
									this->tailBlock=startBlock==nullptr?firstAllocatedBlock:startBlock;
									return false;
								}

								// pr_blockIndexFront is updated inside new_block_index, so we need to
								// update our fallback value too (since we keep the new index even if we
								// later fail)
								originalBlockIndexFront=originalBlockIndexSlotsUsed;
							}

							// Insert a new block in the circular linked list
							auto newBlock=this->parent->ConcurrentQueue::template requisition_block<allocMode>();
							if(newBlock==nullptr){
								pr_blockIndexFront=originalBlockIndexFront;
								pr_blockIndexSlotsUsed=originalBlockIndexSlotsUsed;
								this->tailBlock=startBlock==nullptr?firstAllocatedBlock:startBlock;
								return false;
							}
#ifdef MCDBGQ_TRACKMEM
							newBlock->owner = this;
#endif
							newBlock->ConcurrentQueue::Block::template set_all_empty<explicit_context>();
							if(this->tailBlock==nullptr){
								newBlock->next=newBlock;
							}else{
								newBlock->next=this->tailBlock->next;
								this->tailBlock->next=newBlock;
							}
							this->tailBlock=newBlock;
							firstAllocatedBlock=firstAllocatedBlock==nullptr?this->tailBlock:firstAllocatedBlock;
							++pr_blockIndexSlotsUsed;
							auto &entry=blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront];
							entry.base=currentTailIndex;
							entry.block=this->tailBlock;
							pr_blockIndexFront=(pr_blockIndexFront+1)&(pr_blockIndexSize-1);
						}

						// Excellent, all allocations succeeded. Reset each block's emptiness before we fill them up, and
						// publish the new block index front
						auto block=firstAllocatedBlock;
						while(true){
							block->ConcurrentQueue::Block::template reset_empty<explicit_context>();
							if(block==this->tailBlock){
								break;
							}
							block=block->next;
						}
						MOODYCAMEL_CONSTEXPR_IF(MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new(static_cast<T *>(nullptr)) T(details::deref_noexcept(itemFirst)))){
							blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront-1)&(pr_blockIndexSize-1), std::memory_order_release);
						}
					}

					// Enqueue, one block at a time
					index_t newTailIndex=startTailIndex+static_cast<index_t>(count);
					currentTailIndex=startTailIndex;
					auto endBlock=this->tailBlock;
					this->tailBlock=startBlock;
					assert((startTailIndex&static_cast<index_t>(BLOCK_SIZE-1))!=0||firstAllocatedBlock!=nullptr||count==0);
					if((startTailIndex&static_cast<index_t>(BLOCK_SIZE-1))==0&&firstAllocatedBlock!=nullptr){
						this->tailBlock=firstAllocatedBlock;
					}
					while(true){
						index_t stopIndex=(currentTailIndex&~static_cast<index_t>(BLOCK_SIZE-1))+static_cast<index_t>(BLOCK_SIZE);
						if(details::circular_less_than<index_t>(newTailIndex, stopIndex)){
							stopIndex=newTailIndex;
						}
						MOODYCAMEL_CONSTEXPR_IF(MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new(static_cast<T *>(nullptr)) T(details::deref_noexcept(itemFirst)))){
							while(currentTailIndex!=stopIndex){
								new((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++);
							}
						}else{
							MOODYCAMEL_TRY{
								while(currentTailIndex!=stopIndex){
									// Must use copy constructor even if move constructor is available
									// because we may have to revert if there's an exception.
									// Sorry about the horrible templated next line, but it was the only way
									// to disable moving *at compile time*, which is important because a type
									// may only define a (noexcept) move constructor, and so calls to the
									// cctor will not compile, even if they are in an if branch that will never
									// be executed
									new((*this->tailBlock)[currentTailIndex]) T(details::nomove_if<!MOODYCAMEL_NOEXCEPT_CTOR(T,
																															 decltype(*itemFirst),
																															 new(static_cast<T *>(nullptr)) T(details::deref_noexcept(itemFirst)))>::eval(
											*itemFirst));
									++currentTailIndex;
									++itemFirst;
								}
							}MOODYCAMEL_CATCH (...){
								// Oh dear, an exception's been thrown -- destroy the elements that
								// were enqueued so far and revert the entire bulk operation (we'll keep
								// any allocated blocks in our linked list for later, though).
								auto constructedStopIndex=currentTailIndex;
								auto lastBlockEnqueued=this->tailBlock;
								pr_blockIndexFront=originalBlockIndexFront;
								pr_blockIndexSlotsUsed=originalBlockIndexSlotsUsed;
								this->tailBlock=startBlock==nullptr?firstAllocatedBlock:startBlock;
								if(!details::is_trivially_destructible<T>::value){
									auto block=startBlock;
									if((startTailIndex&static_cast<index_t>(BLOCK_SIZE-1))==0){
										block=firstAllocatedBlock;
									}
									currentTailIndex=startTailIndex;
									while(true){
										stopIndex=(currentTailIndex&~static_cast<index_t>(BLOCK_SIZE-1))+static_cast<index_t>(BLOCK_SIZE);
										if(details::circular_less_than<index_t>(constructedStopIndex, stopIndex)){
											stopIndex=constructedStopIndex;
										}
										while(currentTailIndex!=stopIndex){
											(*block)[currentTailIndex++]->~T();
										}
										if(block==lastBlockEnqueued){
											break;
										}
										block=block->next;
									}
								}
								MOODYCAMEL_RETHROW;
							}
						}
						if(this->tailBlock==endBlock){
							assert(currentTailIndex==newTailIndex);
							break;
						}
						this->tailBlock=this->tailBlock->next;
					}
					MOODYCAMEL_CONSTEXPR_IF(!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new(static_cast<T *>(nullptr)) T(details::deref_noexcept(itemFirst)))){
						if(firstAllocatedBlock!=nullptr)
							blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront-1)&(pr_blockIndexSize-1), std::memory_order_release);
					}
					this->tailIndex.store(newTailIndex, std::memory_order_release);
					return true;
				}
				template<typename It>
				u_index dequeue_bulk(It &itemFirst, u_index max){
					auto tail=this->tailIndex.load(std::memory_order_relaxed);
					auto overcommit=this->dequeueOvercommit.load(std::memory_order_relaxed);
					auto desiredCount=static_cast<u_index>(tail-(this->dequeueOptimisticCount.load(std::memory_order_relaxed)-overcommit));
					if(details::circular_less_than<u_index>(0, desiredCount)){
						desiredCount=desiredCount<max?desiredCount:max;
						std::atomic_thread_fence(std::memory_order_acquire);
						auto myDequeueCount=this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed);
						tail=this->tailIndex.load(std::memory_order_acquire);
						auto actualCount=static_cast<u_index>(tail-(myDequeueCount-overcommit));
						if(details::circular_less_than<u_index>(0, actualCount)){
							actualCount=desiredCount<actualCount?desiredCount:actualCount;
							if(actualCount<desiredCount){
								this->dequeueOvercommit.fetch_add(desiredCount-actualCount, std::memory_order_release);
							}

							// Get the first index. Note that since there's guaranteed to be at least actualCount elements, this
							// will never exceed tail.
							auto firstIndex=this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel);

							// Determine which block the first element is in
							auto localBlockIndex=blockIndex.load(std::memory_order_acquire);
							auto localBlockIndexHead=localBlockIndex->front.load(std::memory_order_acquire);
							auto headBase=localBlockIndex->entries[localBlockIndexHead].base;
							auto firstBlockBaseIndex=firstIndex&~static_cast<index_t>(BLOCK_SIZE-1);
							auto offset=static_cast<u_index>(static_cast<typename std::make_signed<index_t>::type>(firstBlockBaseIndex-headBase)/
															static_cast<typename std::make_signed<index_t>::type>(BLOCK_SIZE));
							auto indexIndex=(localBlockIndexHead+offset)&(localBlockIndex->size-1);

							// Iterate the blocks and dequeue
							auto index=firstIndex;
							do{
								auto firstIndexInBlock=index;
								index_t endIndex=(index&~static_cast<index_t>(BLOCK_SIZE-1))+static_cast<index_t>(BLOCK_SIZE);
								endIndex=details::circular_less_than<index_t>(firstIndex+static_cast<index_t>(actualCount), endIndex)?firstIndex+static_cast<index_t>(actualCount):endIndex;
								auto block=localBlockIndex->entries[indexIndex].block;
								if(MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, details::deref_noexcept(itemFirst)=std::move((*(*block)[index])))){
									while(index!=endIndex){
										auto &el=*((*block)[index]);
										*itemFirst++=std::move(el);
										el.~T();
										++index;
									}
								}else{
									MOODYCAMEL_TRY{
										while(index!=endIndex){
											auto &el=*((*block)[index]);
											*itemFirst=std::move(el);
											++itemFirst;
											el.~T();
											++index;
										}
									}MOODYCAMEL_CATCH (...){
										// It's too late to revert the dequeue, but we can make sure that all
										// the dequeued objects are properly destroyed and the block index
										// (and empty count) are properly updated before we propagate the exception
										do{
											block=localBlockIndex->entries[indexIndex].block;
											while(index!=endIndex){
												(*block)[index++]->~T();
											}
											block->ConcurrentQueue::Block::template set_many_empty<explicit_context>(firstIndexInBlock, static_cast<u_index>(endIndex-firstIndexInBlock));
											indexIndex=(indexIndex+1)&(localBlockIndex->size-1);
											firstIndexInBlock=index;
											endIndex=(index&~static_cast<index_t>(BLOCK_SIZE-1))+static_cast<index_t>(BLOCK_SIZE);
											endIndex=details::circular_less_than<index_t>(firstIndex+static_cast<index_t>(actualCount), endIndex)?firstIndex+static_cast<index_t>(actualCount):endIndex;
										}while(index!=firstIndex+actualCount);
										MOODYCAMEL_RETHROW;
									}
								}
								block->ConcurrentQueue::Block::template set_many_empty<explicit_context>(firstIndexInBlock, static_cast<u_index>(endIndex-firstIndexInBlock));
								indexIndex=(indexIndex+1)&(localBlockIndex->size-1);
							}while(index!=firstIndex+actualCount);
							return actualCount;
						}else{
							// Wasn't anything to dequeue after all; make the effective dequeue count eventually consistent
							this->dequeueOvercommit.fetch_add(desiredCount, std::memory_order_release);
						}
					}
					return 0;
				}
				private:
				struct BlockIndexEntry{
					index_t base;
					Block *block;
				};
				struct BlockIndexHeader{
					u_index size;
					std::atomic <u_index> front;        // Current slot (not next, like pr_blockIndexFront)
					BlockIndexEntry *entries;
					void *prev;
				};
				bool new_block_index(u_index numberOfFilledSlotsToExpose){
					auto prevBlockSizeMask=pr_blockIndexSize-1;

					// Create the new block
					pr_blockIndexSize<<=1;
					auto newRawPtr=static_cast<char *>((Traits::malloc)(sizeof(BlockIndexHeader)+std::alignment_of<BlockIndexEntry>::value-1+sizeof(BlockIndexEntry)*pr_blockIndexSize));
					if(newRawPtr==nullptr){
						pr_blockIndexSize>>=1;        // Reset to allow graceful retry
						return false;
					}
					auto newBlockIndexEntries=reinterpret_cast<BlockIndexEntry *>(details::align_for<BlockIndexEntry>(newRawPtr+sizeof(BlockIndexHeader)));

					// Copy in all the old indices, if any
					u_index j=0;
					if(pr_blockIndexSlotsUsed!=0){
						auto i=(pr_blockIndexFront-pr_blockIndexSlotsUsed)&prevBlockSizeMask;
						do{
							newBlockIndexEntries[j++]=pr_blockIndexEntries[i];
							i=(i+1)&prevBlockSizeMask;
						}while(i!=pr_blockIndexFront);
					}

					// Update everything
					auto header=new(newRawPtr) BlockIndexHeader;
					header->size=pr_blockIndexSize;
					header->front.store(numberOfFilledSlotsToExpose-1, std::memory_order_relaxed);
					header->entries=newBlockIndexEntries;
					header->prev=pr_blockIndexRaw;        // we link the new block to the old one so we can free it later

					pr_blockIndexFront=j;
					pr_blockIndexEntries=newBlockIndexEntries;
					pr_blockIndexRaw=newRawPtr;
					blockIndex.store(header, std::memory_order_release);
					return true;
				}
				private:
				std::atomic<BlockIndexHeader*>blockIndex;
				// To be used by producer only -- consumer must use the ones in referenced by blockIndex
				u_index pr_blockIndexSlotsUsed;
				u_index pr_blockIndexSize;
				u_index pr_blockIndexFront;        // Next slot (not current)
				BlockIndexEntry *pr_blockIndexEntries;
				void *pr_blockIndexRaw;
#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
				public:
		ExplicitProducer* nextExplicitProducer;
		private:
#endif
#ifdef MCDBGQ_TRACKMEM
				friend struct MemStats;
#endif
			};


			//////////////////////////////////
			// Implicit queue
			//////////////////////////////////

			struct ImplicitProducer :
			public ProducerBase{
				ImplicitProducer(ConcurrentQueue*parent_): ProducerBase(parent_, false), nextBlockIndexCapacity(IMPLICIT_INITIAL_INDEX_SIZE), blockIndex(nullptr)
				{
					new_block_index();
				}
				~ImplicitProducer()
				{
					// Note that since we're in the destructor we can assume that all enqueue/dequeue operations
					// completed already; this means that all undequeued elements are placed contiguously across
					// contiguous blocks, and that only the first and last remaining blocks can be only partially
					// empty (all other remaining blocks must be completely full).

#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
					// Unregister ourselves for thread termination notification
					if(!this->inactive.load(std::memory_order_relaxed)){
						details::ThreadExitNotifier::unsubscribe(&threadExitListener);
					}
#endif

					// Destroy all remaining elements!
					auto tail=this->tailIndex.load(std::memory_order_relaxed);
					auto index=this->headIndex.load(std::memory_order_relaxed);
					Block *block=nullptr;
					assert(index==tail||details::circular_less_than(index, tail));
					bool forceFreeLastBlock=index!=tail;        // If we enter the loop, then the last (tail) block will not be freed
					while(index!=tail){
						if((index&static_cast<index_t>(BLOCK_SIZE-1))==0||block==nullptr){
							if(block!=nullptr){
								// Free the old block
								this->parent->add_block_to_free_list(block);
							}
							block=get_block_index_entry_for_index(index)->value.load(std::memory_order_relaxed);
						}
						((*block)[index])->~T();
						++index;
					}
					// Even if the queue is empty, there's still one block that's not on the free list
					// (unless the head index reached the end of it, in which case the tail will be poised
					// to create a new block).
					if(this->tailBlock!=nullptr&&(forceFreeLastBlock||(tail&static_cast<index_t>(BLOCK_SIZE-1))!=0)){
						this->parent->add_block_to_free_list(this->tailBlock);
					}

					// Destroy block index
					auto localBlockIndex=blockIndex.load(std::memory_order_relaxed);
					if(localBlockIndex!=nullptr){
						for(u_index i=0;i!=localBlockIndex->capacity;++i){
							localBlockIndex->index[i]->~BlockIndexEntry();
						}
						do{
							auto prev=localBlockIndex->prev;
							localBlockIndex->~BlockIndexHeader();
							(Traits::free)(localBlockIndex);
							localBlockIndex=prev;
						}while(localBlockIndex!=nullptr);
					}
				}
				template<AllocationMode allocMode, typename U>
				inline bool enqueue(U &&element){
					index_t currentTailIndex=this->tailIndex.load(std::memory_order_relaxed);
					index_t newTailIndex=1+currentTailIndex;
					if((currentTailIndex&static_cast<index_t>(BLOCK_SIZE-1))==0){
						// We reached the end of a block, start a new one
						auto head=this->headIndex.load(std::memory_order_relaxed);
						assert(!details::circular_less_than<index_t>(currentTailIndex, head));
						if(!details::circular_less_than<index_t>(head, currentTailIndex+BLOCK_SIZE)||
						   (MAX_SUBQUEUE_SIZE!=details::const_numeric_max<u_index>::value&&(MAX_SUBQUEUE_SIZE==0||MAX_SUBQUEUE_SIZE-BLOCK_SIZE<currentTailIndex-head))){
							return false;
						}
#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
						debug::DebugLock lock(mutex);
#endif
						// Find out where we'll be inserting this block in the block index
						BlockIndexEntry *idxEntry;
						if(!insert_block_index_entry<allocMode>(idxEntry, currentTailIndex)){
							return false;
						}

						// Get ahold of a new block
						auto newBlock=this->parent->ConcurrentQueue::template requisition_block<allocMode>();
						if(newBlock==nullptr){
							rewind_block_index_tail();
							idxEntry->value.store(nullptr, std::memory_order_relaxed);
							return false;
						}
#ifdef MCDBGQ_TRACKMEM
						newBlock->owner = this;
#endif
						newBlock->ConcurrentQueue::Block::template reset_empty<implicit_context>();
						MOODYCAMEL_CONSTEXPR_IF(!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new(static_cast<T *>(nullptr)) T(std::forward<U>(element)))){
							// May throw, try to insert now before we publish the fact that we have this new block
							MOODYCAMEL_TRY{
								new((*newBlock)[currentTailIndex]) T(std::forward<U>(element));
							}MOODYCAMEL_CATCH (...){
								rewind_block_index_tail();
								idxEntry->value.store(nullptr, std::memory_order_relaxed);
								this->parent->add_block_to_free_list(newBlock);
								MOODYCAMEL_RETHROW;
							}
						}

						// Insert the new block into the index
						idxEntry->value.store(newBlock, std::memory_order_relaxed);
						this->tailBlock=newBlock;
						MOODYCAMEL_CONSTEXPR_IF(!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new(static_cast<T *>(nullptr)) T(std::forward<U>(element)))){
							this->tailIndex.store(newTailIndex, std::memory_order_release);
							return true;
						}
					}

					// Enqueue
					new((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element));
					this->tailIndex.store(newTailIndex, std::memory_order_release);
					return true;
				}
				template<typename U>
				bool dequeue(U &element){
					// See ExplicitProducer::dequeue for rationale and explanation
					index_t tail=this->tailIndex.load(std::memory_order_relaxed);
					index_t overcommit=this->dequeueOvercommit.load(std::memory_order_relaxed);
					if(details::circular_less_than<index_t>(this->dequeueOptimisticCount.load(std::memory_order_relaxed)-overcommit, tail)){
						std::atomic_thread_fence(std::memory_order_acquire);
						index_t myDequeueCount=this->dequeueOptimisticCount.fetch_add(1, std::memory_order_relaxed);
						tail=this->tailIndex.load(std::memory_order_acquire);
						if((details::likely)(details::circular_less_than<index_t>(myDequeueCount-overcommit, tail))){
							index_t index=this->headIndex.fetch_add(1, std::memory_order_acq_rel);

							// Determine which block the element is in
							auto entry=get_block_index_entry_for_index(index);

							// Dequeue
							auto block=entry->value.load(std::memory_order_relaxed);
							auto &el=*((*block)[index]);
							if(!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, element=std::move(el))){
#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
								// Note: Acquiring the mutex with every dequeue instead of only when a block
		// is released is very sub-optimal, but it is, after all, purely debug code.
		debug::DebugLock lock(producer->mutex);
#endif
								struct Guard{
									Block *block;
									index_t index;
									BlockIndexEntry *entry;
									ConcurrentQueue *parent;
									~Guard(){
										(*block)[index]->~T();
										if(block->ConcurrentQueue::Block::template set_empty<implicit_context>(index)){
											entry->value.store(nullptr, std::memory_order_relaxed);
											parent->add_block_to_free_list(block);
										}
									}
								}guard={block, index, entry, this->parent};
								element=std::move(el); // NOLINT
							}else{
								element=std::move(el); // NOLINT
								el.~T(); // NOLINT

								if(block->ConcurrentQueue::Block::template set_empty<implicit_context>(index)){
									{
#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
										debug::DebugLock lock(mutex);
#endif
										// Add the block back into the global free pool (and remove from block index)
										entry->value.store(nullptr, std::memory_order_relaxed);
									}
									this->parent->add_block_to_free_list(block);        // releases the above store
								}
							}
							return true;
						}else{
							this->dequeueOvercommit.fetch_add(1, std::memory_order_release);
						}
					}
					return false;
				}
#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable: 4706)  // assignment within conditional expression
#endif
				template<AllocationMode allocMode, typename It>
				bool enqueue_bulk(It itemFirst, u_index count){
					// First, we need to make sure we have enough room to enqueue all of the elements;
					// this means pre-allocating blocks and putting them in the block index (but only if
					// all the allocations succeeded).

					// Note that the tailBlock we start off with may not be owned by us any more;
					// this happens if it was filled up exactly to the top (setting tailIndex to
					// the first index of the next block which is not yet allocated), then dequeued
					// completely (putting it on the free list) before we enqueue again.

					index_t startTailIndex=this->tailIndex.load(std::memory_order_relaxed);
					auto startBlock=this->tailBlock;
					Block *firstAllocatedBlock=nullptr;
					auto endBlock=this->tailBlock;

					// Figure out how many blocks we'll need to allocate, and do so
					u_index blockBaseDiff=((startTailIndex+count-1)&~static_cast<index_t>(BLOCK_SIZE-1))-((startTailIndex-1)&~static_cast<index_t>(BLOCK_SIZE-1));
					index_t currentTailIndex=(startTailIndex-1)&~static_cast<index_t>(BLOCK_SIZE-1);
					if(blockBaseDiff>0){
#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
						debug::DebugLock lock(mutex);
#endif
						do{
							blockBaseDiff-=static_cast<index_t>(BLOCK_SIZE);
							currentTailIndex+=static_cast<index_t>(BLOCK_SIZE);

							// Find out where we'll be inserting this block in the block index
							BlockIndexEntry *idxEntry=nullptr;  // initialization here unnecessary but compiler can't always tell
							Block *newBlock;
							bool indexInserted=false;
							auto head=this->headIndex.load(std::memory_order_relaxed);
							assert(!details::circular_less_than<index_t>(currentTailIndex, head));
							bool full=!details::circular_less_than<index_t>(head, currentTailIndex+BLOCK_SIZE)||
									  (MAX_SUBQUEUE_SIZE!=details::const_numeric_max<u_index>::value&&(MAX_SUBQUEUE_SIZE==0||MAX_SUBQUEUE_SIZE-BLOCK_SIZE<currentTailIndex-head));
							if(full||
							   !(indexInserted=insert_block_index_entry<allocMode>(idxEntry, currentTailIndex))||
							   (newBlock=this->parent->ConcurrentQueue::template requisition_block<allocMode>())==nullptr){
								// Index allocation or block allocation failed; revert any other allocations
								// and index insertions done so far for this operation
								if(indexInserted){
									rewind_block_index_tail();
									idxEntry->value.store(nullptr, std::memory_order_relaxed);
								}
								currentTailIndex=(startTailIndex-1)&~static_cast<index_t>(BLOCK_SIZE-1);
								for(auto block=firstAllocatedBlock;block!=nullptr;block=block->next){
									currentTailIndex+=static_cast<index_t>(BLOCK_SIZE);
									idxEntry=get_block_index_entry_for_index(currentTailIndex);
									idxEntry->value.store(nullptr, std::memory_order_relaxed);
									rewind_block_index_tail();
								}
								this->parent->add_blocks_to_free_list(firstAllocatedBlock);
								this->tailBlock=startBlock;
								return false;
							}
#ifdef MCDBGQ_TRACKMEM
							newBlock->owner = this;
#endif
							newBlock->ConcurrentQueue::Block::template reset_empty<implicit_context>();
							newBlock->next=nullptr;

							// Insert the new block into the index
							idxEntry->value.store(newBlock, std::memory_order_relaxed);

							// Store the chain of blocks so that we can undo if later allocations fail,
							// and so that we can find the blocks when we do the actual enqueueing
							if((startTailIndex&static_cast<index_t>(BLOCK_SIZE-1))!=0||firstAllocatedBlock!=nullptr){
								assert(this->tailBlock!=nullptr);
								this->tailBlock->next=newBlock;
							}
							this->tailBlock=newBlock;
							endBlock=newBlock;
							firstAllocatedBlock=firstAllocatedBlock==nullptr?newBlock:firstAllocatedBlock;
						}while(blockBaseDiff>0);
					}

					// Enqueue, one block at a time
					index_t newTailIndex=startTailIndex+static_cast<index_t>(count);
					currentTailIndex=startTailIndex;
					this->tailBlock=startBlock;
					assert((startTailIndex&static_cast<index_t>(BLOCK_SIZE-1))!=0||firstAllocatedBlock!=nullptr||count==0);
					if((startTailIndex&static_cast<index_t>(BLOCK_SIZE-1))==0&&firstAllocatedBlock!=nullptr){
						this->tailBlock=firstAllocatedBlock;
					}
					while(true){
						index_t stopIndex=(currentTailIndex&~static_cast<index_t>(BLOCK_SIZE-1))+static_cast<index_t>(BLOCK_SIZE);
						if(details::circular_less_than<index_t>(newTailIndex, stopIndex)){
							stopIndex=newTailIndex;
						}
						MOODYCAMEL_CONSTEXPR_IF(MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new(static_cast<T *>(nullptr)) T(details::deref_noexcept(itemFirst)))){
							while(currentTailIndex!=stopIndex){
								new((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++);
							}
						}else{
							MOODYCAMEL_TRY{
								while(currentTailIndex!=stopIndex){
									new((*this->tailBlock)[currentTailIndex]) T(details::nomove_if<!MOODYCAMEL_NOEXCEPT_CTOR(T,
																															 decltype(*itemFirst),
																															 new(static_cast<T *>(nullptr)) T(details::deref_noexcept(itemFirst)))>::eval(
											*itemFirst));
									++currentTailIndex;
									++itemFirst;
								}
							}MOODYCAMEL_CATCH (...){
								auto constructedStopIndex=currentTailIndex;
								auto lastBlockEnqueued=this->tailBlock;
								if(!details::is_trivially_destructible<T>::value){
									auto block=startBlock;
									if((startTailIndex&static_cast<index_t>(BLOCK_SIZE-1))==0){
										block=firstAllocatedBlock;
									}
									currentTailIndex=startTailIndex;
									while(true){
										stopIndex=(currentTailIndex&~static_cast<index_t>(BLOCK_SIZE-1))+static_cast<index_t>(BLOCK_SIZE);
										if(details::circular_less_than<index_t>(constructedStopIndex, stopIndex)){
											stopIndex=constructedStopIndex;
										}
										while(currentTailIndex!=stopIndex){
											(*block)[currentTailIndex++]->~T();
										}
										if(block==lastBlockEnqueued){
											break;
										}
										block=block->next;
									}
								}
								currentTailIndex=(startTailIndex-1)&~static_cast<index_t>(BLOCK_SIZE-1);
								for(auto block=firstAllocatedBlock;block!=nullptr;block=block->next){
									currentTailIndex+=static_cast<index_t>(BLOCK_SIZE);
									auto idxEntry=get_block_index_entry_for_index(currentTailIndex);
									idxEntry->value.store(nullptr, std::memory_order_relaxed);
									rewind_block_index_tail();
								}
								this->parent->add_blocks_to_free_list(firstAllocatedBlock);
								this->tailBlock=startBlock;
								MOODYCAMEL_RETHROW;
							}
						}
						if(this->tailBlock==endBlock){
							assert(currentTailIndex==newTailIndex);
							break;
						}
						this->tailBlock=this->tailBlock->next;
					}
					this->tailIndex.store(newTailIndex, std::memory_order_release);
					return true;
				}
#ifdef _MSC_VER
#pragma warning(pop)
#endif
				template<typename It>
				u_index dequeue_bulk(It &itemFirst, u_index max){
					auto tail=this->tailIndex.load(std::memory_order_relaxed);
					auto overcommit=this->dequeueOvercommit.load(std::memory_order_relaxed);
					auto desiredCount=static_cast<u_index>(tail-(this->dequeueOptimisticCount.load(std::memory_order_relaxed)-overcommit));
					if(details::circular_less_than<u_index>(0, desiredCount)){
						desiredCount=desiredCount<max?desiredCount:max;
						std::atomic_thread_fence(std::memory_order_acquire);
						auto myDequeueCount=this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed);
						tail=this->tailIndex.load(std::memory_order_acquire);
						auto actualCount=static_cast<u_index>(tail-(myDequeueCount-overcommit));
						if(details::circular_less_than<u_index>(0, actualCount)){
							actualCount=desiredCount<actualCount?desiredCount:actualCount;
							if(actualCount<desiredCount){
								this->dequeueOvercommit.fetch_add(desiredCount-actualCount, std::memory_order_release);
							}

							// Get the first index. Note that since there's guaranteed to be at least actualCount elements, this
							// will never exceed tail.
							auto firstIndex=this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel);

							// Iterate the blocks and dequeue
							auto index=firstIndex;
							BlockIndexHeader *localBlockIndex;
							auto indexIndex=get_block_index_index_for_index(index, localBlockIndex);
							do{
								auto blockStartIndex=index;
								index_t endIndex=(index&~static_cast<index_t>(BLOCK_SIZE-1))+static_cast<index_t>(BLOCK_SIZE);
								endIndex=details::circular_less_than<index_t>(firstIndex+static_cast<index_t>(actualCount), endIndex)?firstIndex+static_cast<index_t>(actualCount):endIndex;
								auto entry=localBlockIndex->index[indexIndex];
								auto block=entry->value.load(std::memory_order_relaxed);
								if(MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, details::deref_noexcept(itemFirst)=std::move((*(*block)[index])))){
									while(index!=endIndex){
										auto &el=*((*block)[index]);
										*itemFirst++=std::move(el);
										el.~T();
										++index;
									}
								}else{
									MOODYCAMEL_TRY{
										while(index!=endIndex){
											auto &el=*((*block)[index]);
											*itemFirst=std::move(el);
											++itemFirst;
											el.~T();
											++index;
										}
									}MOODYCAMEL_CATCH (...){
										do{
											entry=localBlockIndex->index[indexIndex];
											block=entry->value.load(std::memory_order_relaxed);
											while(index!=endIndex){
												(*block)[index++]->~T();
											}
											if(block->ConcurrentQueue::Block::template set_many_empty<implicit_context>(blockStartIndex, static_cast<u_index>(endIndex-blockStartIndex))){
#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
												debug::DebugLock lock(mutex);
#endif
												entry->value.store(nullptr, std::memory_order_relaxed);
												this->parent->add_block_to_free_list(block);
											}
											indexIndex=(indexIndex+1)&(localBlockIndex->capacity-1);
											blockStartIndex=index;
											endIndex=(index&~static_cast<index_t>(BLOCK_SIZE-1))+static_cast<index_t>(BLOCK_SIZE);
											endIndex=details::circular_less_than<index_t>(firstIndex+static_cast<index_t>(actualCount), endIndex)?firstIndex+static_cast<index_t>(actualCount):endIndex;
										}while(index!=firstIndex+actualCount);
										MOODYCAMEL_RETHROW;
									}
								}
								if(block->ConcurrentQueue::Block::template set_many_empty<implicit_context>(blockStartIndex, static_cast<u_index>(endIndex-blockStartIndex))){
									{
#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
										debug::DebugLock lock(mutex);
#endif
										// Note that the set_many_empty above did a release, meaning that anybody who acquires the block
										// we're about to free can use it safely since our writes (and reads!) will have happened-before then.
										entry->value.store(nullptr, std::memory_order_relaxed);
									}
									this->parent->add_block_to_free_list(block);        // releases the above store
								}
								indexIndex=(indexIndex+1)&(localBlockIndex->capacity-1);
							}while(index!=firstIndex+actualCount);
							return actualCount;
						}else{
							this->dequeueOvercommit.fetch_add(desiredCount, std::memory_order_release);
						}
					}
					return 0;
				}
				private:
				// The block size must be > 1, so any number with the low bit set is an invalid block base index
				static const index_t INVALID_BLOCK_BASE=1;
				struct BlockIndexEntry{
					std::atomic <index_t> key;
					std::atomic<Block *> value;
				};
				struct BlockIndexHeader{
					u_index capacity;
					std::atomic <u_index> tail;
					BlockIndexEntry *entries;
					BlockIndexEntry **index;
					BlockIndexHeader *prev;
				};
				template<AllocationMode allocMode>
				inline bool insert_block_index_entry(BlockIndexEntry *&idxEntry, index_t blockStartIndex){
					auto localBlockIndex=blockIndex.load(std::memory_order_relaxed);        // We're the only writer thread, relaxed is OK
					if(localBlockIndex==nullptr){
						return false;  // this can happen if new_block_index failed in the constructor
					}
					u_index newTail=(localBlockIndex->tail.load(std::memory_order_relaxed)+1)&(localBlockIndex->capacity-1);
					idxEntry=localBlockIndex->index[newTail];
					if(idxEntry->key.load(std::memory_order_relaxed)==INVALID_BLOCK_BASE||idxEntry->value.load(std::memory_order_relaxed)==nullptr){
						idxEntry->key.store(blockStartIndex, std::memory_order_relaxed);
						localBlockIndex->tail.store(newTail, std::memory_order_release);
						return true;
					}

					// No room in the old block index, try to allocate another one!
					MOODYCAMEL_CONSTEXPR_IF(allocMode==CannotAlloc){
						return false;
					}else if(!new_block_index()){
						return false;
					}else{
						localBlockIndex=blockIndex.load(std::memory_order_relaxed);
						newTail=(localBlockIndex->tail.load(std::memory_order_relaxed)+1)&(localBlockIndex->capacity-1);
						idxEntry=localBlockIndex->index[newTail];
						assert(idxEntry->key.load(std::memory_order_relaxed)==INVALID_BLOCK_BASE);
						idxEntry->key.store(blockStartIndex, std::memory_order_relaxed);
						localBlockIndex->tail.store(newTail, std::memory_order_release);
						return true;
					}
				}
				inline void rewind_block_index_tail(){
					auto localBlockIndex=blockIndex.load(std::memory_order_relaxed);
					localBlockIndex->tail.store((localBlockIndex->tail.load(std::memory_order_relaxed)-1)&(localBlockIndex->capacity-1), std::memory_order_relaxed);
				}
				inline BlockIndexEntry *get_block_index_entry_for_index(index_t index) const{
					BlockIndexHeader *localBlockIndex;
					auto idx=get_block_index_index_for_index(index, localBlockIndex);
					return localBlockIndex->index[idx];
				}
				inline u_index get_block_index_index_for_index(index_t index, BlockIndexHeader *&localBlockIndex) const{
#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
					debug::DebugLock lock(mutex);
#endif
					index&=~static_cast<index_t>(BLOCK_SIZE-1);
					localBlockIndex=blockIndex.load(std::memory_order_acquire);
					auto tail=localBlockIndex->tail.load(std::memory_order_acquire);
					auto tailBase=localBlockIndex->index[tail]->key.load(std::memory_order_relaxed);
					assert(tailBase!=INVALID_BLOCK_BASE);
					// Note: Must use division instead of shift because the index may wrap around, causing a negative
					// offset, whose negativity we want to preserve
					auto offset=static_cast<u_index>(static_cast<typename std::make_signed<index_t>::type>(index-tailBase)/static_cast<typename std::make_signed<index_t>::type>(BLOCK_SIZE));
					u_index idx=(tail+offset)&(localBlockIndex->capacity-1);
					assert(localBlockIndex->index[idx]->key.load(std::memory_order_relaxed)==index&&localBlockIndex->index[idx]->value.load(std::memory_order_relaxed)!=nullptr);
					return idx;
				}
				bool new_block_index(){
					auto prev=blockIndex.load(std::memory_order_relaxed);
					u_index prevCapacity=prev==nullptr?0:prev->capacity;
					auto entryCount=prev==nullptr?nextBlockIndexCapacity:prevCapacity;
					auto raw=static_cast<char *>((Traits::malloc)(sizeof(BlockIndexHeader)+
																  std::alignment_of<BlockIndexEntry>::value-
																  1+
																  sizeof(BlockIndexEntry)*entryCount+
																  std::alignment_of<BlockIndexEntry *>::value-
																  1+
																  sizeof(BlockIndexEntry*)*nextBlockIndexCapacity));
					if(raw==nullptr){
						return false;
					}
					auto header=new(raw) BlockIndexHeader;
					auto entries=reinterpret_cast<BlockIndexEntry *>(details::align_for<BlockIndexEntry>(raw+sizeof(BlockIndexHeader)));
					auto index=reinterpret_cast<BlockIndexEntry **>(details::align_for<BlockIndexEntry *>(reinterpret_cast<char *>(entries)+sizeof(BlockIndexEntry)*entryCount));
					if(prev!=nullptr){
						auto prevTail=prev->tail.load(std::memory_order_relaxed);
						auto prevPos=prevTail;
						u_index i=0;
						do{
							prevPos=(prevPos+1)&(prev->capacity-1);
							index[i++]=prev->index[prevPos];
						}while(prevPos!=prevTail);
						assert(i==prevCapacity);
					}
					for(u_index i=0;i!=entryCount;++i){
						new(entries+i) BlockIndexEntry;
						entries[i].key.store(INVALID_BLOCK_BASE, std::memory_order_relaxed);
						index[prevCapacity+i]=entries+i;
					}
					header->prev=prev;
					header->entries=entries;
					header->index=index;
					header->capacity=nextBlockIndexCapacity;
					header->tail.store((prevCapacity-1)&(nextBlockIndexCapacity-1), std::memory_order_relaxed);
					blockIndex.store(header, std::memory_order_release);
					nextBlockIndexCapacity<<=1;
					return true;
				}
				private:
				u_index nextBlockIndexCapacity;
				std::atomic<BlockIndexHeader*>blockIndex;
#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
				public:
				details::ThreadExitListener threadExitListener;
				private:
#endif
#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
				public:
		ImplicitProducer* nextImplicitProducer;
		private:
#endif
#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
				mutable debug::DebugMutex mutex;
#endif
#ifdef MCDBGQ_TRACKMEM
				friend struct MemStats;
#endif
			};


			//////////////////////////////////
			// Block pool manipulation
			//////////////////////////////////

			void populate_initial_block_list(u_index blockCount){
				initialBlockPoolSize=blockCount;
				if(initialBlockPoolSize==0){
					initialBlockPool=nullptr;
					return;
				}
				initialBlockPool=create_array<Block>(blockCount);
				if(initialBlockPool==nullptr){
					initialBlockPoolSize=0;
				}
				for(u_index i=0;i<initialBlockPoolSize;++i){
					initialBlockPool[i].dynamicallyAllocated=false;
				}
			}
			inline Block *try_get_block_from_initial_pool(){
				if(initialBlockPoolIndex.load(std::memory_order_relaxed)>=initialBlockPoolSize){
					return nullptr;
				}
				auto index=initialBlockPoolIndex.fetch_add(1, std::memory_order_relaxed);
				return index<initialBlockPoolSize?(initialBlockPool+index):nullptr;
			}
			inline void add_block_to_free_list(Block *block){
#ifdef MCDBGQ_TRACKMEM
				block->owner = nullptr;
#endif
				if(!Traits::RECYCLE_ALLOCATED_BLOCKS&&block->dynamicallyAllocated){
					destroy(block);
				}else{
					freeList.add(block);
				}
			}
			inline void add_blocks_to_free_list(Block *block){
				while(block!=nullptr){
					auto next=block->next;
					add_block_to_free_list(block);
					block=next;
				}
			}
			inline Block *try_get_block_from_free_list(){
				return freeList.try_get();
			}
			// Gets a free block from one of the memory pools, or allocates a new one (if applicable)
			template<AllocationMode canAlloc>
			Block *requisition_block(){
				auto block=try_get_block_from_initial_pool();
				if(block!=nullptr){
					return block;
				}
				block=try_get_block_from_free_list();
				if(block!=nullptr){
					return block;
				}
				MOODYCAMEL_CONSTEXPR_IF(canAlloc==CanAlloc){
					return create<Block>();
				}else{
					return nullptr;
				}
			}
#ifdef MCDBGQ_TRACKMEM
			public:
		struct MemStats {
		u_index allocatedBlocks;
		u_index usedBlocks;
		u_index freeBlocks;
		u_index ownedBlocksExplicit;
		u_index ownedBlocksImplicit;
		u_index implicitProducers;
		u_index explicitProducers;
		u_index elementsEnqueued;
		u_index blockClassBytes;
		u_index queueClassBytes;
		u_index implicitBlockIndexBytes;
		u_index explicitBlockIndexBytes;

		friend class ConcurrentQueue;

		private:
		static MemStats getFor(ConcurrentQueue* q)
		{
		MemStats stats = { 0 };

		stats.elementsEnqueued = q->size_approx();

		auto block = q->freeList.head_unsafe();
		while (block != nullptr) {
		++stats.allocatedBlocks;
		++stats.freeBlocks;
		block = block->freeListNext.load(std::memory_order_relaxed);
		}

		for (auto ptr = q->producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
		bool implicit = dynamic_cast<ImplicitProducer*>(ptr) != nullptr;
		stats.implicitProducers += implicit ? 1 : 0;
		stats.explicitProducers += implicit ? 0 : 1;

		if (implicit) {
		auto prod = static_cast<ImplicitProducer*>(ptr);
		stats.queueClassBytes += sizeof(ImplicitProducer);
		auto head = prod->headIndex.load(std::memory_order_relaxed);
		auto tail = prod->tailIndex.load(std::memory_order_relaxed);
		auto hash = prod->blockIndex.load(std::memory_order_relaxed);
		if (hash != nullptr) {
		for (u_index i = 0; i != hash->capacity; ++i) {
		if (hash->index[i]->key.load(std::memory_order_relaxed) != ImplicitProducer::INVALID_BLOCK_BASE && hash->index[i]->value.load(std::memory_order_relaxed) != nullptr) {
		++stats.allocatedBlocks;
		++stats.ownedBlocksImplicit;
		}
		}
		stats.implicitBlockIndexBytes += hash->capacity * sizeof(typename ImplicitProducer::BlockIndexEntry);
		for (; hash != nullptr; hash = hash->prev) {
		stats.implicitBlockIndexBytes += sizeof(typename ImplicitProducer::BlockIndexHeader) + hash->capacity * sizeof(typename ImplicitProducer::BlockIndexEntry*);
		}
		}
		for (; details::circular_less_than<index_t>(head, tail); head += BLOCK_SIZE) {
		//auto block = prod->get_block_index_entry_for_index(head);
		++stats.usedBlocks;
		}
		}
		else {
		auto prod = static_cast<ExplicitProducer*>(ptr);
		stats.queueClassBytes += sizeof(ExplicitProducer);
		auto tailBlock = prod->tailBlock;
		bool wasNonEmpty = false;
		if (tailBlock != nullptr) {
		auto block = tailBlock;
		do {
		++stats.allocatedBlocks;
		if (!block->ConcurrentQueue::Block::template is_empty<explicit_context>() || wasNonEmpty) {
		++stats.usedBlocks;
		wasNonEmpty = wasNonEmpty || block != tailBlock;
		}
		++stats.ownedBlocksExplicit;
		block = block->next;
		} while (block != tailBlock);
		}
		auto index = prod->blockIndex.load(std::memory_order_relaxed);
		while (index != nullptr) {
		stats.explicitBlockIndexBytes += sizeof(typename ExplicitProducer::BlockIndexHeader) + index->size * sizeof(typename ExplicitProducer::BlockIndexEntry);
		index = static_cast<typename ExplicitProducer::BlockIndexHeader*>(index->prev);
		}
		}
		}

		auto freeOnInitialPool = q->initialBlockPoolIndex.load(std::memory_order_relaxed) >= q->initialBlockPoolSize ? 0 : q->initialBlockPoolSize - q->initialBlockPoolIndex.load(std::memory_order_relaxed);
		stats.allocatedBlocks += freeOnInitialPool;
		stats.freeBlocks += freeOnInitialPool;

		stats.blockClassBytes = sizeof(Block) * stats.allocatedBlocks;
		stats.queueClassBytes += sizeof(ConcurrentQueue);

		return stats;
		}
		};

		// For debugging only. Not thread-safe.
		MemStats getMemStats()
		{
		return MemStats::getFor(this);
		}
		private:
		friend struct MemStats;
#endif


			//////////////////////////////////
			// Producer list manipulation
			//////////////////////////////////

			ProducerBase *recycle_or_create_producer(bool isExplicit){
#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
				debug::DebugLock lock(implicitProdMutex);
#endif
				// Try to re-use one first
				for(auto paramRegPtr=producerListTail.load(std::memory_order_acquire);paramRegPtr!=nullptr;paramRegPtr=paramRegPtr->next_prod()){
					if(paramRegPtr->inactive.load(std::memory_order_relaxed)&&paramRegPtr->isExplicit==isExplicit){
						bool expected=true;
						if(paramRegPtr->inactive.compare_exchange_strong(expected, /* desired */ false, std::memory_order_acquire, std::memory_order_relaxed)){
							// We caught one! It's been marked as activated, the caller can have it
							return paramRegPtr;
						}
					}
				}
				return add_producer(isExplicit?static_cast<ProducerBase *>(create<ExplicitProducer>(this)):create<ImplicitProducer>(this));
			}
			ProducerBase *add_producer(ProducerBase *producer){
				// Handle failed memory allocation
				if(producer==nullptr){
					return nullptr;
				}
				producerCount.fetch_add(1, std::memory_order_relaxed);

				// Add it to the lock-free list
				auto prevTail=producerListTail.load(std::memory_order_relaxed);
				do{
					producer->next=prevTail;
				}while(!producerListTail.compare_exchange_weak(prevTail, producer, std::memory_order_release, std::memory_order_relaxed));
#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
				if (producer->isExplicit) {
		auto prevTailExplicit = explicitProducers.load(std::memory_order_relaxed);
		do {
		static_cast<ExplicitProducer*>(producer)->nextExplicitProducer = prevTailExplicit;
		} while (!explicitProducers.compare_exchange_weak(prevTailExplicit, static_cast<ExplicitProducer*>(producer), std::memory_order_release, std::memory_order_relaxed));
		}
		else {
		auto prevTailImplicit = implicitProducers.load(std::memory_order_relaxed);
		do {
		static_cast<ImplicitProducer*>(producer)->nextImplicitProducer = prevTailImplicit;
		} while (!implicitProducers.compare_exchange_weak(prevTailImplicit, static_cast<ImplicitProducer*>(producer), std::memory_order_release, std::memory_order_relaxed));
		}
#endif
				return producer;
			}
			void reown_producers(){
				// After another instance is moved-into/swapped-with this one, all the
				// producers we stole still think their parents are the other queue.
				// So fix them up!
				for(auto paramRegPtr=producerListTail.load(std::memory_order_relaxed);paramRegPtr!=nullptr;paramRegPtr=paramRegPtr->next_prod()){
					paramRegPtr->parent=this;
				}
			}


			//////////////////////////////////
			// Implicit producer hash
			//////////////////////////////////

			struct ImplicitProducerKVP{
				std::atomic <details::thread_id_t> key;
				ImplicitProducer *value;        // No need for atomicity since it's only read by the thread that sets it in the first place

				ImplicitProducerKVP(): value(nullptr)
				{}
				ImplicitProducerKVP(ImplicitProducerKVP&&other)
				MOODYCAMEL_NOEXCEPT {
						key.store(other.key.load(std::memory_order_relaxed), std::memory_order_relaxed);
						value=other.value;
				}
				inline ImplicitProducerKVP &operator=(ImplicitProducerKVP &&other) MOODYCAMEL_NOEXCEPT{
					swap(other);
					return *this;
				}
				inline void swap(ImplicitProducerKVP &other) MOODYCAMEL_NOEXCEPT{
					if(this!=&other){
						details::swap_relaxed(key, other.key);
						std::swap(value, other.value);
					}
				}
			};
			template<typename XT, typename XTraits>
			friend void moodycamel::swap(typename ConcurrentQueue<XT, XTraits>::ImplicitProducerKVP &, typename ConcurrentQueue<XT, XTraits>::ImplicitProducerKVP &) MOODYCAMEL_NOEXCEPT;
			struct ImplicitProducerHash{
				u_index capacity;
				ImplicitProducerKVP *entries;
				ImplicitProducerHash *prev;
			};
			inline void populate_initial_implicit_producer_hash(){
				MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE==0){
					return;
				}else{
					implicitProducerHashCount.store(0, std::memory_order_relaxed);
					auto hash=&initialImplicitProducerHash;
					hash->capacity=INITIAL_IMPLICIT_PRODUCER_HASH_SIZE;
					hash->entries=&initialImplicitProducerHashEntries[0];
					for(u_index i=0;i!=INITIAL_IMPLICIT_PRODUCER_HASH_SIZE;++i){
						initialImplicitProducerHashEntries[i].key.store(details::invalid_thread_id, std::memory_order_relaxed);
					}
					hash->prev=nullptr;
					implicitProducerHash.store(hash, std::memory_order_relaxed);
				}
			}
			void swap_implicit_producer_hashes(ConcurrentQueue &other){
				MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE==0){
					return;
				}else{
					// Swap (assumes our implicit producer hash is initialized)
					initialImplicitProducerHashEntries.swap(other.initialImplicitProducerHashEntries);
					initialImplicitProducerHash.entries=&initialImplicitProducerHashEntries[0];
					other.initialImplicitProducerHash.entries=&other.initialImplicitProducerHashEntries[0];
					details::swap_relaxed(implicitProducerHashCount, other.implicitProducerHashCount);
					details::swap_relaxed(implicitProducerHash, other.implicitProducerHash);
					if(implicitProducerHash.load(std::memory_order_relaxed)==&other.initialImplicitProducerHash){
						implicitProducerHash.store(&initialImplicitProducerHash, std::memory_order_relaxed);
					}else{
						ImplicitProducerHash *hash;
						for(hash=implicitProducerHash.load(std::memory_order_relaxed);hash->prev!=&other.initialImplicitProducerHash;hash=hash->prev){
							continue;
						}
						hash->prev=&initialImplicitProducerHash;
					}
					if(other.implicitProducerHash.load(std::memory_order_relaxed)==&initialImplicitProducerHash){
						other.implicitProducerHash.store(&other.initialImplicitProducerHash, std::memory_order_relaxed);
					}else{
						ImplicitProducerHash *hash;
						for(hash=other.implicitProducerHash.load(std::memory_order_relaxed);hash->prev!=&initialImplicitProducerHash;hash=hash->prev){
							continue;
						}
						hash->prev=&other.initialImplicitProducerHash;
					}
				}
			}
			// Only fails (returns nullptr) if memory allocation fails
			ImplicitProducer *get_or_add_implicit_producer(){
				// Note that since the data is essentially thread-local (key is thread ID),
				// there's a reduced need for fences (memory ordering is already consistent
				// for any individual thread), except for the current table itself.

				// Start by looking for the thread ID in the current and all previous hash tables.
				// If it's not found, it must not be in there yet, since this same thread would
				// have added it previously to one of the tables that we traversed.

				// Code and algorithm adapted from http://preshing.com/20130605/the-worlds-simplest-lock-free-hash-table

#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
				debug::DebugLock lock(implicitProdMutex);
#endif
				auto id=details::thread_id();
				auto hashedId=details::hash_thread_id(id);
				auto mainHash=implicitProducerHash.load(std::memory_order_acquire);
				assert(mainHash!=nullptr);  // silence clang-tidy and MSVC warnings (hash cannot be nullptr)
				for(auto hash=mainHash;hash!=nullptr;hash=hash->prev){
					// Look for the id in this hash
					auto index=hashedId;
					while(true){        // Not an infinite loop because at least one slot is free in the hash table
						index&=hash->capacity-1u;
						auto probedKey=hash->entries[index].key.load(std::memory_order_relaxed);
						if(probedKey==id){
							// Found it! If we had to search several hashes deep, though, we should lazily add it
							// to the current main hash table to avoid the extended search next time.
							// Note there's guaranteed to be room in the current hash table since every subsequent
							// table implicitly reserves space for all previous tables (there's only one
							// implicitProducerHashCount).
							auto value=hash->entries[index].value;
							if(hash!=mainHash){
								index=hashedId;
								while(true){
									index&=mainHash->capacity-1u;
									auto empty=details::invalid_thread_id;
#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
									auto reusable=details::invalid_thread_id2;
									if(mainHash->entries[index].key.compare_exchange_strong(empty, id, std::memory_order_seq_cst, std::memory_order_relaxed)||
									   mainHash->entries[index].key.compare_exchange_strong(reusable, id, std::memory_order_seq_cst, std::memory_order_relaxed)){
#else
										if (mainHash->entries[index].key.compare_exchange_strong(empty,    id, std::memory_order_seq_cst, std::memory_order_relaxed)) {
#endif
										mainHash->entries[index].value=value;
										break;
									}
									++index;
								}
							}
							return value;
						}
						if(probedKey==details::invalid_thread_id){
							break;        // Not in this hash table
						}
						++index;
					}
				}

				// Insert!
				auto newCount=1+implicitProducerHashCount.fetch_add(1, std::memory_order_relaxed);
				while(true){
					// NOLINTNEXTLINE(clang-analyzer-core.nullptrDereference)
					if(newCount>=(mainHash->capacity>>1)&&!implicitProducerHashResizeInProgress.test_and_set(std::memory_order_acquire)){
						// We've acquired the resize lock, try to allocate a bigger hash table.
						// Note the acquire fence synchronizes with the release fence at the end of this block, and hence when
						// we reload implicitProducerHash it must be the most recent version (it only gets changed within this
						// locked block).
						mainHash=implicitProducerHash.load(std::memory_order_acquire);
						if(newCount>=(mainHash->capacity>>1)){
							u_index newCapacity=mainHash->capacity<<1;
							while(newCount>=(newCapacity>>1)){
								newCapacity<<=1;
							}
							auto raw=static_cast<char *>((Traits::malloc)(sizeof(ImplicitProducerHash)+std::alignment_of<ImplicitProducerKVP>::value-1+sizeof(ImplicitProducerKVP)*newCapacity));
							if(raw==nullptr){
								// Allocation failed
								implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed);
								implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
								return nullptr;
							}
							auto newHash=new(raw) ImplicitProducerHash;
							newHash->capacity=static_cast<u_index>(newCapacity);
							newHash->entries=reinterpret_cast<ImplicitProducerKVP *>(details::align_for<ImplicitProducerKVP>(raw+sizeof(ImplicitProducerHash)));
							for(u_index i=0;i!=newCapacity;++i){
								new(newHash->entries+i) ImplicitProducerKVP;
								newHash->entries[i].key.store(details::invalid_thread_id, std::memory_order_relaxed);
							}
							newHash->prev=mainHash;
							implicitProducerHash.store(newHash, std::memory_order_release);
							implicitProducerHashResizeInProgress.clear(std::memory_order_release);
							mainHash=newHash;
						}else{
							implicitProducerHashResizeInProgress.clear(std::memory_order_release);
						}
					}

					// If it's < three-quarters full, add to the old one anyway so that we don't have to wait for the next table
					// to finish being allocated by another thread (and if we just finished allocating above, the condition will
					// always be true)
					if(newCount<(mainHash->capacity>>1)+(mainHash->capacity>>2)){
						auto producer=static_cast<ImplicitProducer *>(recycle_or_create_producer(false));
						if(producer==nullptr){
							implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed);
							return nullptr;
						}
#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
						producer->threadExitListener.callback=&ConcurrentQueue::implicit_producer_thread_exited_callback;
						producer->threadExitListener.userData=producer;
						details::ThreadExitNotifier::subscribe(&producer->threadExitListener);
#endif
						auto index=hashedId;
						while(true){
							index&=mainHash->capacity-1u;
							auto empty=details::invalid_thread_id;
#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
							auto reusable=details::invalid_thread_id2;
							if(mainHash->entries[index].key.compare_exchange_strong(reusable, id, std::memory_order_seq_cst, std::memory_order_relaxed)){
								implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed);  // already counted as a used slot
								mainHash->entries[index].value=producer;
								break;
							}
#endif
							if(mainHash->entries[index].key.compare_exchange_strong(empty, id, std::memory_order_seq_cst, std::memory_order_relaxed)){
								mainHash->entries[index].value=producer;
								break;
							}
							++index;
						}
						return producer;
					}

					// Hmm, the old hash is quite full and somebody else is busy allocating a new one.
					// We need to wait for the allocating thread to finish (if it succeeds, we add, if not,
					// we try to allocate ourselves).
					mainHash=implicitProducerHash.load(std::memory_order_acquire);
				}
			}
#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
			void implicit_producer_thread_exited(ImplicitProducer *producer){
				// Remove from hash
#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
				debug::DebugLock lock(implicitProdMutex);
#endif
				auto hash=implicitProducerHash.load(std::memory_order_acquire);
				assert(hash!=nullptr);        // The thread exit listener is only registered if we were added to a hash in the first place
				auto id=details::thread_id();
				auto hashedId=details::hash_thread_id(id);
				details::thread_id_t probedKey;

				// We need to traverse all the hashes just in case other threads aren't on the current one yet and are
				// trying to add an entry thinking there's a free slot (because they reused a producer)
				for(;hash!=nullptr;hash=hash->prev){
					auto index=hashedId;
					do{
						index&=hash->capacity-1u;
						probedKey=id;
						if(hash->entries[index].key.compare_exchange_strong(probedKey, details::invalid_thread_id2, std::memory_order_seq_cst, std::memory_order_relaxed)){
							break;
						}
						++index;
					}while(probedKey!=
						   details::invalid_thread_id);        // Can happen if the hash has changed but we weren't put back in it yet, or if we weren't added to this hash in the first place
				}

				// Mark the queue as being recyclable
				producer->inactive.store(true, std::memory_order_release);
			}
			static void implicit_producer_thread_exited_callback(void *userData){
				auto producer=static_cast<ImplicitProducer *>(userData);
				auto queue=producer->parent;
				queue->implicit_producer_thread_exited(producer);
			}
#endif

			//////////////////////////////////
			// Utility functions
			//////////////////////////////////

			template<typename TAlign>
			static inline void *aligned_malloc(u_index size){
				MOODYCAMEL_CONSTEXPR_IF(std::alignment_of<TAlign>::value<=std::alignment_of<details::max_align_t>::value) return (Traits::malloc)(size);
				else{
					u_index alignment=std::alignment_of<TAlign>::value;
					void *raw=(Traits::malloc)(size+alignment-1+sizeof(void *));
					if(!raw)
						return nullptr;
					char *paramRegPtr=details::align_for<TAlign>(reinterpret_cast<char *>(raw)+sizeof(void *));
					*(reinterpret_cast<void **>(paramRegPtr)-1)=raw;
					return paramRegPtr;
				}
			}
			template<typename TAlign>
			static inline void aligned_free(void *paramRegPtr){
				MOODYCAMEL_CONSTEXPR_IF(std::alignment_of<TAlign>::value<=std::alignment_of<details::max_align_t>::value) return (Traits::free)(paramRegPtr);
				else
					(Traits::free)(paramRegPtr?*(reinterpret_cast<void **>(paramRegPtr)-1):nullptr);
			}
			template<typename U>
			static inline U *create_array(u_index count){
				assert(count>0);
				U *p=static_cast<U *>(aligned_malloc<U>(sizeof(U)*count));
				if(p==nullptr)
					return nullptr;
				for(u_index i=0;i!=count;++i)
					new(p+i) U();
				return p;
			}
			template<typename U>
			static inline void destroy_array(U *p, u_index count){
				if(p!=nullptr){
					assert(count>0);
					for(u_index i=count;i!=0;)
						(p+ --i)->~U();
				}
				aligned_free<U>(p);
			}
			template<typename U>
			static inline U *create(){
				void *p=aligned_malloc<U>(sizeof(U));
				return p!=nullptr?new(p) U:nullptr;
			}
			template<typename U, typename A1>
			static inline U *create(A1 &&a1){
				void *p=aligned_malloc<U>(sizeof(U));
				return p!=nullptr?new(p) U(std::forward<A1>(a1)):nullptr;
			}
			template<typename U>
			static inline void destroy(U *p){
				if(p!=nullptr)
					p->~U();
				aligned_free<U>(p);
			}
			private:
			std::atomic<ProducerBase *> producerListTail;
			std::atomic <std::uint32_t> producerCount;
			std::atomic <u_index> initialBlockPoolIndex;
			Block *initialBlockPool;
			u_index initialBlockPoolSize;
#ifndef MCDBGQ_USEDEBUGFREELIST
			FreeList<Block> freeList;
#else
			debug::DebugFreeList<Block> freeList;
#endif
			std::atomic<ImplicitProducerHash *> implicitProducerHash;
			std::atomic <u_index> implicitProducerHashCount;        // Number of slots logically used
			ImplicitProducerHash initialImplicitProducerHash;
			std::array <ImplicitProducerKVP, INITIAL_IMPLICIT_PRODUCER_HASH_SIZE> initialImplicitProducerHashEntries;
			std::atomic_flag implicitProducerHashResizeInProgress;
			std::atomic <std::uint32_t> nextExplicitConsumerId;
			std::atomic <std::uint32_t> globalExplicitConsumerOffset;
#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
			debug::DebugMutex implicitProdMutex;
#endif
#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
			std::atomic<ExplicitProducer*> explicitProducers;
		std::atomic<ImplicitProducer*> implicitProducers;
#endif
	};
	template<typename T, typename Traits>
	ProducerToken::ProducerToken(ConcurrentQueue<T, Traits> &queue)
			: producer(queue.recycle_or_create_producer(true)){
		if(producer!=nullptr){
			producer->token=this;
		}
	}
	template<typename T, typename Traits>
	ProducerToken::ProducerToken(BlockingConcurrentQueue<T, Traits> &queue)
			: producer(reinterpret_cast<ConcurrentQueue<T, Traits> *>(&queue)->recycle_or_create_producer(true)){
		if(producer!=nullptr){
			producer->token=this;
		}
	}
	template<typename T, typename Traits>
	ConsumerToken::ConsumerToken(ConcurrentQueue<T, Traits> &queue)
			: itemsConsumedFromCurrent(0), currentProducer(nullptr), desiredProducer(nullptr){
		initialOffset=queue.nextExplicitConsumerId.fetch_add(1, std::memory_order_release);
		lastKnownGlobalOffset=static_cast<std::uint32_t>(-1);
	}
	template<typename T, typename Traits>
	ConsumerToken::ConsumerToken(BlockingConcurrentQueue<T, Traits> &queue)
			: itemsConsumedFromCurrent(0), currentProducer(nullptr), desiredProducer(nullptr){
		initialOffset=reinterpret_cast<ConcurrentQueue<T, Traits> *>(&queue)->nextExplicitConsumerId.fetch_add(1, std::memory_order_release);
		lastKnownGlobalOffset=static_cast<std::uint32_t>(-1);
	}
	template<typename T, typename Traits> inline void swap(ConcurrentQueue<T, Traits>&a, ConcurrentQueue<T, Traits>&b)
	MOODYCAMEL_NOEXCEPT{
	a.
	swap(b);
}
inline void swap(ProducerToken &a, ProducerToken &b) MOODYCAMEL_NOEXCEPT{
	a.swap(b);
}
inline void swap(ConsumerToken &a, ConsumerToken &b) MOODYCAMEL_NOEXCEPT{
	a.swap(b);
}
template<typename T, typename Traits>
inline void swap(typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP &a, typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP &b) MOODYCAMEL_NOEXCEPT{
	a.swap(b);
}
}
#if defined(_MSC_VER)&&(!defined(_HAS_CXX17)||!_HAS_CXX17)
#pragma warning(pop)
#endif
#if defined(__GNUC__)&&!defined(__INTEL_COMPILER)
#pragma GCC diagnostic pop
#endif
